summaryrefslogtreecommitdiff
path: root/spamfilter/Utilities
diff options
context:
space:
mode:
authorGluzskiy Alexandr <sss123next@list.ru>2009-10-13 05:04:06 +0300
committerGluzskiy Alexandr <sss123next@list.ru>2009-10-13 05:04:06 +0300
commit227022d9ed977c75196725502847e0b371e4e879 (patch)
tree6fe79f5ae836fe4a974db459553eb6b46a1bf8eb /spamfilter/Utilities
parent23d6d3e482927c13294f204b34ce23c6f445e8ac (diff)
spamfilter branchHEADmaster
Diffstat (limited to 'spamfilter/Utilities')
-rw-r--r--spamfilter/Utilities/DebugHelper.h338
-rw-r--r--spamfilter/Utilities/PCRE/bin/pcre.dllbin0 -> 183313 bytes
-rw-r--r--spamfilter/Utilities/PCRE/bin/pcreposix.dllbin0 -> 175142 bytes
-rw-r--r--spamfilter/Utilities/PCRE/include/pcre.h239
-rw-r--r--spamfilter/Utilities/PCRE/include/pcreposix.h117
-rw-r--r--spamfilter/Utilities/PCRE/lib/libpcre-bcc.libbin0 -> 2560 bytes
-rw-r--r--spamfilter/Utilities/PCRE/lib/libpcre.def26
-rw-r--r--spamfilter/Utilities/PCRE/lib/libpcre.dll.abin0 -> 15942 bytes
-rw-r--r--spamfilter/Utilities/PCRE/lib/libpcre.libbin0 -> 6514 bytes
-rw-r--r--spamfilter/Utilities/PCRE/lib/libpcreposix-bcc.libbin0 -> 2048 bytes
-rw-r--r--spamfilter/Utilities/PCRE/lib/libpcreposix.def16
-rw-r--r--spamfilter/Utilities/PCRE/lib/libpcreposix.dll.abin0 -> 9690 bytes
-rw-r--r--spamfilter/Utilities/PCRE/lib/libpcreposix.libbin0 -> 4386 bytes
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcre.3.html174
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcre_compile.3.html67
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcre_config.3.html56
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcre_copy_named_substring.3.html48
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcre_copy_substring.3.html47
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcre_exec.3.html71
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcre_free_substring.3.html37
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcre_free_substring_list.3.html37
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcre_fullinfo.3.html67
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcre_get_named_substring.3.html48
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcre_get_stringnumber.3.html42
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcre_get_substring.3.html47
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcre_get_substring_list.3.html47
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcre_info.3.html35
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcre_maketables.3.html39
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcre_study.3.html49
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcre_version.3.html36
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcreapi.3.html1069
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcrebuild.3.html167
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcrecallout.3.html148
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcrecompat.3.html115
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcregrep.1.html147
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcrepattern.3.html1268
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcreperform.3.html86
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcreposix.3.html187
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcresample.3.html72
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcretest.1.html433
-rw-r--r--spamfilter/Utilities/PCRE/manifest/pcre-5.0-doc.mft58
-rw-r--r--spamfilter/Utilities/PCRE/manifest/pcre-5.0-doc.ver2
-rw-r--r--spamfilter/Utilities/PCRE/manifest/pcre-5.0-lib.mft12
-rw-r--r--spamfilter/Utilities/PCRE/manifest/pcre-5.0-lib.ver2
-rw-r--r--spamfilter/Utilities/PCRE/pcre_subst/pcre_subst.373
-rw-r--r--spamfilter/Utilities/PCRE/pcre_subst/pcre_subst.c191
-rw-r--r--spamfilter/Utilities/PCRE/pcre_subst/pcre_subst.h35
-rw-r--r--spamfilter/Utilities/UnicoWS/license.txt66
-rw-r--r--spamfilter/Utilities/UnicoWS/redist.txt12
-rw-r--r--spamfilter/Utilities/UnicoWS/unicows.dllbin0 -> 258352 bytes
-rw-r--r--spamfilter/Utilities/UnicoWS/unicows.libbin0 -> 2325412 bytes
-rw-r--r--spamfilter/Utilities/UnicoWS/unicows.pdbbin0 -> 347136 bytes
52 files changed, 5826 insertions, 0 deletions
diff --git a/spamfilter/Utilities/DebugHelper.h b/spamfilter/Utilities/DebugHelper.h
new file mode 100644
index 0000000..cf1b4b3
--- /dev/null
+++ b/spamfilter/Utilities/DebugHelper.h
@@ -0,0 +1,338 @@
+/*
+
+DebugHelper.h
+Copyright © 2004-2006 Heiko Herkenrath
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+*/
+
+// Version 3.0, Sept 26 2005
+// Heiko Herkenrath
+
+// -----------------------------
+
+// Switches:
+
+//#define DEACTIVATE_DEBUG_HELPER
+//#define NO_DEBUG_HELPER
+//#define ALSO_USE_DEBUG_HELPER_FOR_RELEASE
+
+// -----------------------------
+
+#if !defined(ALSO_USE_DEBUG_HELPER_FOR_RELEASE)
+ #if !defined(_DEBUG)
+ #define DEACTIVATE_DEBUG_HELPER
+ #endif
+#endif
+
+// -----------------------------
+
+#ifndef NO_DEBUG_HELPERS
+
+ #define DEBUG_BUFFER_SIZE 1024
+
+ // Creating display text for all functions
+ #if !defined(DEACTIVATE_DEBUG_HELPER)
+ __inline WCHAR* CreateDebugText(const WCHAR* pszFmt, const WCHAR* pszFile, unsigned int uLine, va_list va)
+ {
+ static int iDebugCallCount = 0;
+ TCHAR* pszDebugText;
+ TCHAR* pszDebugBuf;
+ TCHAR* ptszLastError;
+ DWORD dwLastError;
+ #if defined(UNICODE)
+ char* pszLastError;
+ #endif
+
+ dwLastError = GetLastError();
+ iDebugCallCount++;
+ MessageBeep(MB_ICONQUESTION);
+
+ if (!pszFmt) return NULL;
+
+ pszDebugText = (TCHAR*)malloc(DEBUG_BUFFER_SIZE*sizeof(TCHAR));
+ if (pszDebugText)
+ {
+ pszDebugBuf = (TCHAR*)malloc(DEBUG_BUFFER_SIZE*sizeof(TCHAR));
+ if (pszDebugBuf)
+ {
+ ptszLastError = NULL;
+
+ #if defined(UNICODE) // FormatMessageW does not work with UnicoWS layer on Win9x/ME
+
+ pszLastError = NULL;
+
+ if (FormatMessageW(FORMAT_MESSAGE_ALLOCATE_BUFFER|FORMAT_MESSAGE_FROM_SYSTEM, NULL, dwLastError, 0, (LPWSTR)&ptszLastError, 0, NULL) == 0)
+ FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER|FORMAT_MESSAGE_FROM_SYSTEM, NULL, dwLastError, 0, (LPSTR)&pszLastError, 0, NULL);
+
+ mir_sntprintf(pszDebugBuf, DEBUG_BUFFER_SIZE, _T("[Message %i]\r\nLast Error: %u\r\nDescription: %hs%sCall: %s, %u\r\n\r\n%s"), iDebugCallCount, dwLastError, pszLastError?pszLastError:"", ptszLastError?ptszLastError:_T(""), PathFindFileName(pszFile), uLine, pszFmt);
+
+ if (pszLastError) LocalFree(pszLastError);
+ if (ptszLastError) LocalFree(ptszLastError);
+
+ #else
+
+ FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER|FORMAT_MESSAGE_FROM_SYSTEM, NULL, dwLastError, 0, (LPTSTR)&ptszLastError, 0, NULL);
+ mir_sntprintf(pszDebugBuf, DEBUG_BUFFER_SIZE, _T("[Message %i]\r\nLast Error: %u\r\nDescription: %sCall: %s, %u\r\n\r\n%s"), iDebugCallCount, dwLastError, ptszLastError?ptszLastError:_T(""), PathFindFileName(pszFile), uLine, pszFmt);
+ if (ptszLastError) LocalFree(ptszLastError);
+
+ #endif
+
+ mir_vsnprintf(pszDebugText, DEBUG_BUFFER_SIZE, pszDebugBuf, va);
+ free(pszDebugBuf);
+ }
+ }
+
+ SetLastError(ERROR_SUCCESS);
+ return pszDebugText;
+ }
+ #endif
+
+
+ // Check if a specific flag is bitwise-or'ed into a flags variable
+ // (returns flag name as string or empty string if not contained)
+ #if !defined(DEACTIVATE_DEBUG_HELPER)
+ #define INFLAGS(flags, flag) ( ((flags&flag) && ((flags^=flag)||TRUE))?_T("|"#flag):_T(""))
+ #else
+ #define INFLAGS(flags, flag)
+ #endif
+
+
+ // BOOLSTR [make a string out of a boolean value]
+ #if !defined(DEACTIVATE_DEBUG_HELPER)
+ #define BOOLSTR(b) ((b)?_T("TRUE"):_T("FALSE"))
+ #else
+ #define BOOLSTR(b)
+ #endif
+
+
+ // BOX [show debug message box with text]
+ #if !defined(DEACTIVATE_DEBUG_HELPER)
+
+ // Functions:
+ #define BOX(str) BOX_Helper(_T(__FILE__), __LINE__, _T("%s"), _T(str))
+ #define BOX1(fmt, p1) BOX_Helper(_T(__FILE__), __LINE__, _T(fmt), (p1), NULL, NULL, NULL)
+ #define BOX2(fmt, p1, p2) BOX_Helper(_T(__FILE__), __LINE__, _T(fmt), (p1), (p2), NULL, NULL)
+ #define BOX3(fmt, p1, p2, p3) BOX_Helper(_T(__FILE__), __LINE__, _T(fmt), (p1), (p2), (p3), NULL)
+ #define BOX4(fmt, p1, p2, p3, p4) BOX_Helper(_T(__FILE__), __LINE__, _T(fmt), (p1), (p2), (p3), (p4))
+ // ---
+
+ __inline void BOX_Helper(const TCHAR* pszFile, unsigned int uLine, const TCHAR* pszFmt, ...)
+ {
+ va_list va;
+ TCHAR* pszText;
+
+ va_start(va, pszFmt);
+ pszText = CreateDebugText(pszFmt, pszFile, uLine, va);
+ va_end(va);
+
+ if (pszText)
+ {
+ // Only show if CTRL is not pressed
+ if (!(GetAsyncKeyState(VK_CONTROL)&0x8000))
+ MessageBoxEx(NULL, pszText, _T("Debug"), MB_OK|MB_TASKMODAL|MB_SETFOREGROUND, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT));
+
+ free(pszText);
+ }
+ }
+
+ #else
+ #define BOX(str)
+ #define BOX1(fmt, p1)
+ #define BOX2(fmt, p1, p2)
+ #define BOX3(fmt, p1, p2, p3)
+ #define BOX4(fmt, p1, p2, p3, p4)
+ #endif
+
+
+
+ // STEP [show debug message box with current code step]
+ #define STEP(section, id) BOX2("Reached Step %s|%u", _T(section), id)
+
+
+
+ // STR [write line into debugger output]
+ #if !defined(DEACTIVATE_DEBUG_HELPER)
+
+ // Functions:
+ #define STR(str) STR_Helper(_T(__FILE__), __LINE__, _T("%s"), _T(str))
+ #define STR1(fmt, p1) STR_Helper(_T(__FILE__), __LINE__, _T(fmt), (p1), NULL, NULL, NULL)
+ #define STR2(fmt, p1, p2) STR_Helper(_T(__FILE__), __LINE__, _T(fmt), (p1), (p2), NULL, NULL)
+ #define STR3(fmt, p1, p2, p3) STR_Helper(_T(__FILE__), __LINE__, _T(fmt), (p1), (p2), (p3), NULL, NULL)
+ #define STR4(fmt, p1, p2, p3, p4) STR_Helper(_T(__FILE__), __LINE__, _T(fmt), (p1), (p2), (p3), (p4))
+ // ---
+
+ __inline void STR_Helper(const TCHAR* pszFile, unsigned int uLine, const TCHAR* pszFmt, ...)
+ {
+ va_list va;
+ TCHAR* pszText;
+
+ va_start(va, pszFmt);
+ pszText = CreateDebugText(pszFmt, pszFile, uLine, va);
+ va_end(va);
+
+ if (pszText)
+ {
+ OutputDebugString(pszText);
+ OutputDebugString(_T("\r\n"));
+
+ free(pszText);
+ }
+ }
+
+ #else
+ #define STR(str)
+ #define STR1(fmt, p1)
+ #define STR2(fmt, p1, p2)
+ #define STR3(fmt, p1, p2, p3)
+ #define STR4(fmt, p1, p2, p3, p4)
+ #endif
+
+
+
+ // DBG [simple wrapper around OutputDebugString]
+ #if !defined(DEACTIVATE_DEBUG_HELPER)
+ #define DBGT(str) OutputDebugString(_T(str)_T("\r\n"))
+ #define DBG(str) { OutputDebugString(str); OutputDebugString(_T("\r\n")); }
+ #else
+ #define DBGT(str)
+ #define DBG(str)
+ #endif
+
+
+
+ // LOG [append line to debug log file]
+ #if !defined(DEACTIVATE_DEBUG_HELPER)
+
+ // Functions:
+ #define LOG(str) LOG_Helper(_T(__FILE__), __LINE__, _T("%s"), _T(str))
+ #define LOG1(fmt, p1) LOG_Helper(_T(__FILE__), __LINE__, _T(fmt), (p1), NULL, NULL, NULL)
+ #define LOG2(fmt, p1, p2) LOG_Helper(_T(__FILE__), __LINE__, _T(fmt), (p1), (p2), NULL, NULL)
+ #define LOG3(fmt, p1, p2, p3) LOG_Helper(_T(__FILE__), __LINE__, _T(fmt), (p1), (p2), (p3), NULL)
+ #define LOG4(fmt, p1, p2, p3, p4) LOG_Helper(_T(__FILE__), __LINE__, _T(fmt), (p1), (p2), (p3), (p4))
+ // ---
+
+ __inline void LOG_Helper(const TCHAR* pszFile, unsigned int uLine, const TCHAR* pszFmt, ...)
+ {
+ va_list va;
+ DWORD dwDebugFileWritten = 0;
+ TCHAR* pszText;
+ TCHAR szDebugLogFile[MAX_PATH];
+ HANDLE hDebugLogFile;
+
+ static BOOL bFirstCall = TRUE;
+
+ va_start(va, pszFmt);
+ pszText = CreateDebugText(pszFmt, pszFile, uLine, va);
+ va_end(va);
+
+ // Filename
+ if (pszText)
+ {
+ GetModuleFileName(NULL, szDebugLogFile, sizeof(szDebugLogFile)-1);
+ PathRemoveFileSpec(szDebugLogFile);
+ PathAppend(szDebugLogFile, _T("Debug.log"));
+
+ hDebugLogFile = CreateFile(szDebugLogFile, GENERIC_WRITE, FILE_SHARE_READ, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
+
+ if (hDebugLogFile == INVALID_HANDLE_VALUE)
+ BOX("LOG ERROR: INVALID_HANDLE_VALUE");
+
+ SetFilePointer(hDebugLogFile, 0, NULL, bFirstCall?FILE_BEGIN:FILE_END);
+ WriteFile(hDebugLogFile, "\r\n", 2*sizeof(TCHAR), &dwDebugFileWritten, NULL);
+ WriteFile(hDebugLogFile, (PBYTE*)pszText, (DWORD)(lstrlen(pszText)*sizeof(TCHAR)), &dwDebugFileWritten, NULL);
+ SetEndOfFile(hDebugLogFile);
+
+ CloseHandle(hDebugLogFile);
+ free(pszText);
+
+ bFirstCall = FALSE;
+ }
+ }
+
+ #else
+ #define LOG(str)
+ #define LOG1(fmt, p1)
+ #define LOG2(fmt, p1, p2)
+ #define LOG3(fmt, p1, p2, p3)
+ #define LOG4(fmt, p1, p2, p3, p4)
+ #endif
+
+
+
+ // POP [show debug popup (Popup Plugin) with text]
+ /*
+ #if !defined(DEACTIVATE_DEBUG_HELPER)
+
+ // Functions:
+ #define POP(str) POP_Helper(_T(__FILE__), __LINE__, _T("%s"), _T(str))
+ #define POP1(fmt, p1) POP_Helper(_T(__FILE__), __LINE__, _T(fmt), (p1), NULL, NULL, NULL)
+ #define POP2(fmt, p1, p2) POP_Helper(_T(__FILE__), __LINE__, _T(fmt), (p1), (p2), NULL, NULL)
+ #define POP3(fmt, p1, p2, p3) POP_Helper(_T(__FILE__), __LINE__, _T(fmt), (p1), (p2), (p3), NULL)
+ #define POP4(fmt, p1, p2, p3, p4) POP_Helper(_T(__FILE__), __LINE__, _T(fmt), (p1), (p2), (p3), (p4))
+ // ---
+
+ __inline void POP_Helper(const TCHAR* pszFile, unsigned int uLine, const TCHAR* pszFmt, ...)
+ {
+ #ifdef MS_POPUP_SHOWMESSAGE
+ va_list va;
+ TCHAR* pszText;
+
+ va_start(va, pszFmt);
+ pszText = CreateDebugText(pszFmt, pszFile, uLine, va);
+ va_end(va);
+
+ // Only show if CTRL is not pressed
+ if (pszText)
+ {
+ if (!(GetAsyncKeyState(VK_CONTROL)&0x8000))
+ CallServiceSync(MS_POPUP_SHOWMESSAGE, (WPARAM)pszText, (LPARAM)SM_NOTIFY);
+
+ free(pszText);
+ }
+ #endif
+ }
+ #else
+ #define POP(str)
+ #define POP1(fmt, p1)
+ #define POP2(fmt, p1, p2)
+ #define POP3(fmt, p1, p2, p3)
+ #define POP4(fmt, p1, p2, p3, p4)
+ #endif
+ */
+
+
+ // LOOPPROT [prevent a loop (for/while/repeat) from becoming an infinite loop]
+ #if !defined(DEACTIVATE_DEBUG_HELPER)
+
+ // LOOPROT(0) breaks the loop when "Esc" is pressed
+
+ #define LOOPPROT(max) { \
+ static unsigned int uDebugLoopCount = 1; \
+ uDebugLoopCount++; \
+ if (GetAsyncKeyState(VK_ESCAPE)&0x8000) break; \
+ MessageBeep(MB_ICONEXCLAMATION); \
+ if (uDebugLoopCount > (max)) { \
+ break; \
+ BOX1("LOOPPROT:\nThe execution of the loop was stopped because it looped %u times.", max); \
+ uDebugLoopCount = 1;\
+ } \
+ }
+
+ #else
+ #define LOOPPROT(max)
+ #endif
+
+#endif
diff --git a/spamfilter/Utilities/PCRE/bin/pcre.dll b/spamfilter/Utilities/PCRE/bin/pcre.dll
new file mode 100644
index 0000000..54f5ae7
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/bin/pcre.dll
Binary files differ
diff --git a/spamfilter/Utilities/PCRE/bin/pcreposix.dll b/spamfilter/Utilities/PCRE/bin/pcreposix.dll
new file mode 100644
index 0000000..06881cf
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/bin/pcreposix.dll
Binary files differ
diff --git a/spamfilter/Utilities/PCRE/include/pcre.h b/spamfilter/Utilities/PCRE/include/pcre.h
new file mode 100644
index 0000000..aa37389
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/include/pcre.h
@@ -0,0 +1,239 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* In its original form, this is the .in file that is transformed by
+"configure" into pcre.h.
+
+ Copyright (c) 1997-2004 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+#ifndef _PCRE_H
+#define _PCRE_H
+
+/* The file pcre.h is build by "configure". Do not edit it; instead
+make changes to pcre.in. */
+
+#define PCRE_MAJOR 5
+#define PCRE_MINOR 0
+#define PCRE_DATE 13-Sep-2004
+
+/* Win32 uses DLL by default */
+
+#ifdef _WIN32
+# ifdef PCRE_DEFINITION
+# ifdef DLL_EXPORT
+# define PCRE_DATA_SCOPE __declspec(dllexport)
+# endif
+# else
+# ifndef PCRE_STATIC
+# define PCRE_DATA_SCOPE extern __declspec(dllimport)
+# endif
+# endif
+#endif
+#ifndef PCRE_DATA_SCOPE
+# define PCRE_DATA_SCOPE extern
+#endif
+
+/* Have to include stdlib.h in order to ensure that size_t is defined;
+it is needed here for malloc. */
+
+#include <stdlib.h>
+
+/* Allow for C++ users */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Options */
+
+#define PCRE_CASELESS 0x0001
+#define PCRE_MULTILINE 0x0002
+#define PCRE_DOTALL 0x0004
+#define PCRE_EXTENDED 0x0008
+#define PCRE_ANCHORED 0x0010
+#define PCRE_DOLLAR_ENDONLY 0x0020
+#define PCRE_EXTRA 0x0040
+#define PCRE_NOTBOL 0x0080
+#define PCRE_NOTEOL 0x0100
+#define PCRE_UNGREEDY 0x0200
+#define PCRE_NOTEMPTY 0x0400
+#define PCRE_UTF8 0x0800
+#define PCRE_NO_AUTO_CAPTURE 0x1000
+#define PCRE_NO_UTF8_CHECK 0x2000
+#define PCRE_AUTO_CALLOUT 0x4000
+#define PCRE_PARTIAL 0x8000
+
+/* Exec-time and get/set-time error codes */
+
+#define PCRE_ERROR_NOMATCH (-1)
+#define PCRE_ERROR_NULL (-2)
+#define PCRE_ERROR_BADOPTION (-3)
+#define PCRE_ERROR_BADMAGIC (-4)
+#define PCRE_ERROR_UNKNOWN_NODE (-5)
+#define PCRE_ERROR_NOMEMORY (-6)
+#define PCRE_ERROR_NOSUBSTRING (-7)
+#define PCRE_ERROR_MATCHLIMIT (-8)
+#define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */
+#define PCRE_ERROR_BADUTF8 (-10)
+#define PCRE_ERROR_BADUTF8_OFFSET (-11)
+#define PCRE_ERROR_PARTIAL (-12)
+#define PCRE_ERROR_BADPARTIAL (-13)
+#define PCRE_ERROR_INTERNAL (-14)
+#define PCRE_ERROR_BADCOUNT (-15)
+
+/* Request types for pcre_fullinfo() */
+
+#define PCRE_INFO_OPTIONS 0
+#define PCRE_INFO_SIZE 1
+#define PCRE_INFO_CAPTURECOUNT 2
+#define PCRE_INFO_BACKREFMAX 3
+#define PCRE_INFO_FIRSTBYTE 4
+#define PCRE_INFO_FIRSTCHAR 4 /* For backwards compatibility */
+#define PCRE_INFO_FIRSTTABLE 5
+#define PCRE_INFO_LASTLITERAL 6
+#define PCRE_INFO_NAMEENTRYSIZE 7
+#define PCRE_INFO_NAMECOUNT 8
+#define PCRE_INFO_NAMETABLE 9
+#define PCRE_INFO_STUDYSIZE 10
+#define PCRE_INFO_DEFAULT_TABLES 11
+
+/* Request types for pcre_config() */
+
+#define PCRE_CONFIG_UTF8 0
+#define PCRE_CONFIG_NEWLINE 1
+#define PCRE_CONFIG_LINK_SIZE 2
+#define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD 3
+#define PCRE_CONFIG_MATCH_LIMIT 4
+#define PCRE_CONFIG_STACKRECURSE 5
+#define PCRE_CONFIG_UNICODE_PROPERTIES 6
+
+/* Bit flags for the pcre_extra structure */
+
+#define PCRE_EXTRA_STUDY_DATA 0x0001
+#define PCRE_EXTRA_MATCH_LIMIT 0x0002
+#define PCRE_EXTRA_CALLOUT_DATA 0x0004
+#define PCRE_EXTRA_TABLES 0x0008
+
+/* Types */
+
+struct real_pcre; /* declaration; the definition is private */
+typedef struct real_pcre pcre;
+
+/* The structure for passing additional data to pcre_exec(). This is defined in
+such as way as to be extensible. Always add new fields at the end, in order to
+remain compatible. */
+
+typedef struct pcre_extra {
+ unsigned long int flags; /* Bits for which fields are set */
+ void *study_data; /* Opaque data from pcre_study() */
+ unsigned long int match_limit; /* Maximum number of calls to match() */
+ void *callout_data; /* Data passed back in callouts */
+ const unsigned char *tables; /* Pointer to character tables */
+} pcre_extra;
+
+/* The structure for passing out data via the pcre_callout_function. We use a
+structure so that new fields can be added on the end in future versions,
+without changing the API of the function, thereby allowing old clients to work
+without modification. */
+
+typedef struct pcre_callout_block {
+ int version; /* Identifies version of block */
+ /* ------------------------ Version 0 ------------------------------- */
+ int callout_number; /* Number compiled into pattern */
+ int *offset_vector; /* The offset vector */
+ const char *subject; /* The subject being matched */
+ int subject_length; /* The length of the subject */
+ int start_match; /* Offset to start of this match attempt */
+ int current_position; /* Where we currently are in the subject */
+ int capture_top; /* Max current capture */
+ int capture_last; /* Most recently closed capture */
+ void *callout_data; /* Data passed in with the call */
+ /* ------------------- Added for Version 1 -------------------------- */
+ int pattern_position; /* Offset to next item in the pattern */
+ int next_item_length; /* Length of next item in the pattern */
+ /* ------------------------------------------------------------------ */
+} pcre_callout_block;
+
+/* Indirection for store get and free functions. These can be set to
+alternative malloc/free functions if required. Special ones are used in the
+non-recursive case for "frames". There is also an optional callout function
+that is triggered by the (?) regex item. Some magic is required for Win32 DLL;
+it is null on other OS. For Virtual Pascal, these have to be different again.
+*/
+
+#ifndef VPCOMPAT
+PCRE_DATA_SCOPE void *(*pcre_malloc)(size_t);
+PCRE_DATA_SCOPE void (*pcre_free)(void *);
+PCRE_DATA_SCOPE void *(*pcre_stack_malloc)(size_t);
+PCRE_DATA_SCOPE void (*pcre_stack_free)(void *);
+PCRE_DATA_SCOPE int (*pcre_callout)(pcre_callout_block *);
+#else /* VPCOMPAT */
+extern void *pcre_malloc(size_t);
+extern void pcre_free(void *);
+extern void *pcre_stack_malloc(size_t);
+extern void pcre_stack_free(void *);
+extern int pcre_callout(pcre_callout_block *);
+#endif /* VPCOMPAT */
+
+/* Exported PCRE functions */
+
+extern pcre *pcre_compile(const char *, int, const char **,
+ int *, const unsigned char *);
+extern int pcre_config(int, void *);
+extern int pcre_copy_named_substring(const pcre *, const char *,
+ int *, int, const char *, char *, int);
+extern int pcre_copy_substring(const char *, int *, int, int,
+ char *, int);
+extern int pcre_exec(const pcre *, const pcre_extra *,
+ const char *, int, int, int, int *, int);
+extern void pcre_free_substring(const char *);
+extern void pcre_free_substring_list(const char **);
+extern int pcre_fullinfo(const pcre *, const pcre_extra *, int,
+ void *);
+extern int pcre_get_named_substring(const pcre *, const char *,
+ int *, int, const char *, const char **);
+extern int pcre_get_stringnumber(const pcre *, const char *);
+extern int pcre_get_substring(const char *, int *, int, int,
+ const char **);
+extern int pcre_get_substring_list(const char *, int *, int,
+ const char ***);
+extern int pcre_info(const pcre *, int *, int *);
+extern const unsigned char *pcre_maketables(void);
+extern pcre_extra *pcre_study(const pcre *, int, const char **);
+extern const char *pcre_version(void);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* End of pcre.h */
diff --git a/spamfilter/Utilities/PCRE/include/pcreposix.h b/spamfilter/Utilities/PCRE/include/pcreposix.h
new file mode 100644
index 0000000..a8056bd
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/include/pcreposix.h
@@ -0,0 +1,117 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+#ifndef _PCREPOSIX_H
+#define _PCREPOSIX_H
+
+/* This is the header for the POSIX wrapper interface to the PCRE Perl-
+Compatible Regular Expression library. It defines the things POSIX says should
+be there. I hope.
+
+ Copyright (c) 1997-2004 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* Have to include stdlib.h in order to ensure that size_t is defined. */
+
+#include <stdlib.h>
+
+/* Allow for C++ users */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Options defined by POSIX. */
+
+#define REG_ICASE 0x01
+#define REG_NEWLINE 0x02
+#define REG_NOTBOL 0x04
+#define REG_NOTEOL 0x08
+
+/* These are not used by PCRE, but by defining them we make it easier
+to slot PCRE into existing programs that make POSIX calls. */
+
+#define REG_EXTENDED 0
+#define REG_NOSUB 0
+
+/* Error values. Not all these are relevant or used by the wrapper. */
+
+enum {
+ REG_ASSERT = 1, /* internal error ? */
+ REG_BADBR, /* invalid repeat counts in {} */
+ REG_BADPAT, /* pattern error */
+ REG_BADRPT, /* ? * + invalid */
+ REG_EBRACE, /* unbalanced {} */
+ REG_EBRACK, /* unbalanced [] */
+ REG_ECOLLATE, /* collation error - not relevant */
+ REG_ECTYPE, /* bad class */
+ REG_EESCAPE, /* bad escape sequence */
+ REG_EMPTY, /* empty expression */
+ REG_EPAREN, /* unbalanced () */
+ REG_ERANGE, /* bad range inside [] */
+ REG_ESIZE, /* expression too big */
+ REG_ESPACE, /* failed to get memory */
+ REG_ESUBREG, /* bad back reference */
+ REG_INVARG, /* bad argument */
+ REG_NOMATCH /* match failed */
+};
+
+
+/* The structure representing a compiled regular expression. */
+
+typedef struct {
+ void *re_pcre;
+ size_t re_nsub;
+ size_t re_erroffset;
+} regex_t;
+
+/* The structure in which a captured offset is returned. */
+
+typedef int regoff_t;
+
+typedef struct {
+ regoff_t rm_so;
+ regoff_t rm_eo;
+} regmatch_t;
+
+/* The functions */
+
+extern int regcomp(regex_t *, const char *, int);
+extern int regexec(const regex_t *, const char *, size_t, regmatch_t *, int);
+extern size_t regerror(int, const regex_t *, char *, size_t);
+extern void regfree(regex_t *);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* End of pcreposix.h */
diff --git a/spamfilter/Utilities/PCRE/lib/libpcre-bcc.lib b/spamfilter/Utilities/PCRE/lib/libpcre-bcc.lib
new file mode 100644
index 0000000..706c7af
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/lib/libpcre-bcc.lib
Binary files differ
diff --git a/spamfilter/Utilities/PCRE/lib/libpcre.def b/spamfilter/Utilities/PCRE/lib/libpcre.def
new file mode 100644
index 0000000..a7bdcbf
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/lib/libpcre.def
@@ -0,0 +1,26 @@
+EXPORTS
+ pcre_callout @1 DATA
+ pcre_compile @2
+ pcre_config @3
+ pcre_copy_named_substring @4
+ pcre_copy_substring @5
+ pcre_exec @6
+ pcre_free @7 DATA
+ pcre_free_substring @8
+ pcre_free_substring_list @9
+ pcre_fullinfo @10
+ pcre_get_named_substring @11
+ pcre_get_stringnumber @12
+ pcre_get_substring @13
+ pcre_get_substring_list @14
+ pcre_info @15
+ pcre_maketables @16
+ pcre_malloc @17 DATA
+ pcre_stack_free @18 DATA
+ pcre_stack_malloc @19 DATA
+ pcre_study @20
+ pcre_version @21
+ regcomp @22
+ regerror @23
+ regexec @24
+ regfree @25
diff --git a/spamfilter/Utilities/PCRE/lib/libpcre.dll.a b/spamfilter/Utilities/PCRE/lib/libpcre.dll.a
new file mode 100644
index 0000000..2191488
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/lib/libpcre.dll.a
Binary files differ
diff --git a/spamfilter/Utilities/PCRE/lib/libpcre.lib b/spamfilter/Utilities/PCRE/lib/libpcre.lib
new file mode 100644
index 0000000..71c1835
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/lib/libpcre.lib
Binary files differ
diff --git a/spamfilter/Utilities/PCRE/lib/libpcreposix-bcc.lib b/spamfilter/Utilities/PCRE/lib/libpcreposix-bcc.lib
new file mode 100644
index 0000000..b532b5b
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/lib/libpcreposix-bcc.lib
Binary files differ
diff --git a/spamfilter/Utilities/PCRE/lib/libpcreposix.def b/spamfilter/Utilities/PCRE/lib/libpcreposix.def
new file mode 100644
index 0000000..8dca4c8
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/lib/libpcreposix.def
@@ -0,0 +1,16 @@
+EXPORTS
+ pcre_callout @1 DATA
+ pcre_compile @2
+ pcre_config @3
+ pcre_exec @4
+ pcre_free @5 DATA
+ pcre_fullinfo @6
+ pcre_info @7
+ pcre_malloc @8 DATA
+ pcre_stack_free @9 DATA
+ pcre_stack_malloc @10 DATA
+ pcre_version @11
+ regcomp @12
+ regerror @13
+ regexec @14
+ regfree @15
diff --git a/spamfilter/Utilities/PCRE/lib/libpcreposix.dll.a b/spamfilter/Utilities/PCRE/lib/libpcreposix.dll.a
new file mode 100644
index 0000000..1208b41
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/lib/libpcreposix.dll.a
Binary files differ
diff --git a/spamfilter/Utilities/PCRE/lib/libpcreposix.lib b/spamfilter/Utilities/PCRE/lib/libpcreposix.lib
new file mode 100644
index 0000000..a4bfe43
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/lib/libpcreposix.lib
Binary files differ
diff --git a/spamfilter/Utilities/PCRE/man/html/pcre.3.html b/spamfilter/Utilities/PCRE/man/html/pcre.3.html
new file mode 100644
index 0000000..93f32fa
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcre.3.html
@@ -0,0 +1,174 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Introduction</a></h2>
+ <p>
+The PCRE library
+is a set of functions that implement regular expression pattern matching
+using the same syntax and semantics as Perl, with just a few differences.
+The current implementation of PCRE (release 5.x) corresponds approximately
+with Perl 5.8, including support for UTF-8 encoded strings and Unicode general
+category properties. However, this support has to be explicitly enabled;
+it is not the default. <p>
+PCRE is written in C and released as a C library.
+A number of people have written wrappers and interfaces of various kinds.
+A C++ class is included in these contributions, which can be found in the
+<i>Contrib</i> directory at the primary FTP site, which is: <p>
+ ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre
+<p>
+Details of exactly which Perl regular expression features are and are not
+supported by PCRE are given in separate documents. See the <b>pcrepattern</b>
+ and <b>pcrecompat</b> pages. <p>
+Some features of PCRE can be included, excluded,
+or changed when the library is built. The <b>pcre_config()</b> function makes
+it possible for a client to discover which features are available. The features
+themselves are described in the <b>pcrebuild</b> page. Documentation about building
+PCRE for various operating systems can be found in the <b>README</b> file in the
+source distribution.
+<h2><a name='sect2' href='#toc2'>User Documentation</a></h2>
+ <p>
+The user documentation for PCRE
+comprises a number of different sections. In the "man" format, each of these
+is a separate "man page". In the HTML format, each is a separate page, linked
+from the index page. In the plain text format, all the sections are concatenated,
+for ease of searching. The sections are as follows: <p>
+ pcre
+this document<br>
+ pcreapi details of PCRE&rsquo;s native API<br>
+ pcrebuild options for building PCRE<br>
+ pcrecallout details of the callout feature<br>
+ pcrecompat discussion of Perl compatibility<br>
+ pcregrep description of the <b>pcregrep</b> command<br>
+ pcrepartial details of the partial matching facility<br>
+ pcrepattern syntax and semantics of supported<br>
+ regular expressions<br>
+ pcreperform discussion of performance issues<br>
+ pcreposix the POSIX-compatible API<br>
+ pcreprecompile details of saving and re-using precompiled patterns<br>
+ pcresample discussion of the sample program<br>
+ pcretest description of the <b>pcretest</b> testing command<br>
+ <p>
+In addition, in the "man" and HTML formats, there is a short page for
+each library function, listing its arguments and results.
+<h2><a name='sect3' href='#toc3'>Limitations</a></h2>
+
+<p>
+There are some size limitations in PCRE but it is hoped that they will
+never in practice be relevant. <p>
+The maximum length of a compiled pattern
+is 65539 (sic) bytes if PCRE is compiled with the default internal linkage
+size of 2. If you want to process regular expressions that are truly enormous,
+you can compile PCRE with an internal linkage size of 3 or 4 (see the <b>README</b>
+file in the source distribution and the <b>pcrebuild</b> documentation for details).
+In these cases the limit is substantially larger. However, the speed of
+execution will be slower. <p>
+All values in repeating quantifiers must be less
+than 65536. The maximum number of capturing subpatterns is 65535. <p>
+There is
+no limit to the number of non-capturing subpatterns, but the maximum depth
+of nesting of all kinds of parenthesized subpattern, including capturing
+subpatterns, assertions, and other types of subpattern, is 200. <p>
+The maximum
+length of a subject string is the largest positive number that an integer
+variable can hold. However, PCRE uses recursion to handle subpatterns and
+indefinite repetition. This means that the available stack space may limit
+the size of a subject string that can be processed by certain patterns.
+<p>
+
+<h2><a name='sect4' href='#toc4'>Utf-8 and Unicode Property Support</a></h2>
+ <p>
+From release 3.3, PCRE has had some
+support for character strings encoded in the UTF-8 format. For release 4.0
+this was greatly extended to cover most common requirements, and in release
+5.0 additional support for Unicode general category properties was added.
+<p>
+In order process UTF-8 strings, you must build PCRE to include UTF-8 support
+in the code, and, in addition, you must call <b>pcre_compile()</b> with the
+PCRE_UTF8 option flag. When you do this, both the pattern and any subject
+strings that are matched against it are treated as UTF-8 strings instead
+of just strings of bytes. <p>
+If you compile PCRE with UTF-8 support, but do
+not use it at run time, the library will be a bit bigger, but the additional
+run time overhead is limited to testing the PCRE_UTF8 flag in several places,
+so should not be very large. <p>
+If PCRE is built with Unicode character property
+support (which implies UTF-8 support), the escape sequences \p{..}, \P{..}, and
+\X are supported. The available properties that can be tested are limited
+to the general category properties such as Lu for an upper case letter
+or Nd for a decimal number. A full list is given in the <b>pcrepattern</b> documentation.
+The PCRE library is increased in size by about 90K when Unicode property
+support is included. <p>
+The following comments apply when PCRE is running in
+UTF-8 mode: <p>
+1. When you set the PCRE_UTF8 flag, the strings passed as patterns
+and subjects are checked for validity on entry to the relevant functions.
+If an invalid UTF-8 string is passed, an error return is given. In some situations,
+you may already know that your strings are valid, and therefore want to
+skip these checks in order to improve performance. If you set the PCRE_NO_UTF8_CHECK
+flag at compile time or at run time, PCRE assumes that the pattern or subject
+it is given (respectively) contains only valid UTF-8 codes. In this case,
+it does not diagnose an invalid UTF-8 string. If you pass an invalid UTF-8
+string to PCRE when PCRE_NO_UTF8_CHECK is set, the results are undefined.
+Your program may crash. <p>
+2. In a pattern, the escape sequence \x{...}, where the
+contents of the braces is a string of hexadecimal digits, is interpreted
+as a UTF-8 character whose code number is the given hexadecimal number,
+for example: \x{1234}. If a non-hexadecimal digit appears between the braces,
+the item is not recognized. This escape sequence can be used either as a
+literal, or within a character class. <p>
+3. The original hexadecimal escape
+sequence, \xhh, matches a two-byte UTF-8 character if the value is greater
+than 127. <p>
+4. Repeat quantifiers apply to complete UTF-8 characters, not to
+individual bytes, for example: \x{100}{3}. <p>
+5. The dot metacharacter matches
+one UTF-8 character instead of a single byte. <p>
+6. The escape sequence \C can
+be used to match a single byte in UTF-8 mode, but its use can lead to some
+strange effects. <p>
+7. The character escapes \b, \B, \d, \D, \s, \S, \w, and \W correctly
+test characters of any code value, but the characters that PCRE recognizes
+as digits, spaces, or word characters remain the same set as before, all
+with values less than 256. This remains true even when PCRE includes Unicode
+property support, because to do otherwise would slow down PCRE in many
+common cases. If you really want to test for a wider sense of, say, "digit",
+you must use Unicode property tests such as \p{Nd}. <p>
+8. Similarly, characters
+that match the POSIX named character classes are all low-valued characters.
+<p>
+9. Case-insensitive matching applies only to characters whose values are
+less than 128, unless PCRE is built with Unicode property support. Even
+when Unicode property support is available, PCRE still uses its own character
+tables when checking the case of low-valued characters, so as not to degrade
+performance. The Unicode property information is used only for characters
+with higher values.
+<h2><a name='sect5' href='#toc5'>Author</a></h2>
+ <p>
+Philip Hazel &lt;ph10@cam.ac.uk&gt; <br>
+University Computing Service, <br>
+Cambridge CB2 3QG, England. <br>
+Phone: +44 1223 334714 <p>
+ Last updated: 09 September 2004 <br>
+Copyright (c) 1997-2004 University of Cambridge. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Introduction</a></li>
+<li><a name='toc2' href='#sect2'>User Documentation</a></li>
+<li><a name='toc3' href='#sect3'>Limitations</a></li>
+<li><a name='toc4' href='#sect4'>Utf-8 and Unicode Property Support</a></li>
+<li><a name='toc5' href='#sect5'>Author</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcre_compile.3.html b/spamfilter/Utilities/PCRE/man/html/pcre_compile.3.html
new file mode 100644
index 0000000..bd9272c
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcre_compile.3.html
@@ -0,0 +1,67 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+ <p>
+<b>#include &lt;pcre.h&gt;</b> <p>
+<font size='-1'></font>
+ <br>
+<b>pcre *pcre_compile(const char *<i>pattern</i>, int <i>options</i>,</b> <b>const char **<i>errptr</i>,
+int *<i>erroffset</i>,</b> <b>const unsigned char *<i>tableptr</i>);</b>
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+ <p>
+This function
+compiles a regular expression into an internal form. Its arguments are:
+<p>
+ <i>pattern</i> A zero-terminated string containing the<br>
+ regular expression to be compiled<br>
+ <i>options</i> Zero or more option bits<br>
+ <i>errptr</i> Where to put an error message<br>
+ <i>erroffset</i> Offset in pattern where error was found<br>
+ <i>tableptr</i> Pointer to character tables, or NULL to<br>
+ use the built-in default<br>
+ <p>
+The option bits are: <p>
+ PCRE_ANCHORED Force pattern anchoring<br>
+ PCRE_AUTO_CALLOUT Compile automatic callouts<br>
+ PCRE_CASELESS Do caseless matching<br>
+ PCRE_DOLLAR_ENDONLY $ not to match newline at end<br>
+ PCRE_DOTALL . matches anything including NL<br>
+ PCRE_EXTENDED Ignore whitespace and # comments<br>
+ PCRE_EXTRA PCRE extra features<br>
+ (not much use currently)<br>
+ PCRE_MULTILINE ^ and $ match newlines within data<br>
+ PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-<br>
+ theses (named ones available)<br>
+ PCRE_UNGREEDY Invert greediness of quantifiers<br>
+ PCRE_UTF8 Run in UTF-8 mode<br>
+ PCRE_NO_UTF8_CHECK Do not check the pattern for UTF-8<br>
+ validity (only relevant if<br>
+ PCRE_UTF8 is set)<br>
+ <p>
+PCRE must be built with UTF-8 support in order to use PCRE_UTF8 and PCRE_NO_UTF8_CHECK.
+<p>
+The yield of the function is a pointer to a private data structure that
+contains the compiled pattern, or NULL if an error was detected. <p>
+There is
+a complete description of the PCRE native API in the <b>pcreapi</b> page and
+a description of the POSIX API in the <b>pcreposix</b> page. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcre_config.3.html b/spamfilter/Utilities/PCRE/man/html/pcre_config.3.html
new file mode 100644
index 0000000..edf2450
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcre_config.3.html
@@ -0,0 +1,56 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+ <p>
+<b>#include &lt;pcre.h&gt;</b> <p>
+<font size='-1'></font>
+ <br>
+<b>int pcre_config(int <i>what</i>, void *<i>where</i>);</b>
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+ <p>
+This function makes
+it possible for a client program to find out which optional features are
+available in the version of the PCRE library it is using. Its arguments
+are as follows: <p>
+ <i>what</i> A code specifying what information is required<br>
+ <i>where</i> Points to where to put the data<br>
+ <p>
+The available codes are: <p>
+ PCRE_CONFIG_LINK_SIZE Internal link size:
+2, 3, or 4<br>
+ PCRE_CONFIG_MATCH_LIMIT Internal resource limit<br>
+ PCRE_CONFIG_NEWLINE Value of the newline character<br>
+ PCRE_CONFIG_POSIX_MALLOC_THRESHOLD<br>
+ Threshold of return slots, above<br>
+ which <b>malloc()</b> is used by<br>
+ the POSIX API<br>
+ PCRE_CONFIG_STACKRECURSE Recursion implementation (1=stack 0=heap)<br>
+ PCRE_CONFIG_UTF8 Availability of UTF-8 support (1=yes 0=no)<br>
+ PCRE_CONFIG_UNICODE_PROPERTIES<br>
+ Availability of Unicode property support<br>
+ (1=yes 0=no)<br>
+ <p>
+The function yields 0 on success or PCRE_ERROR_BADOPTION otherwise. <p>
+There
+is a complete description of the PCRE native API in the <b>pcreapi</b> page
+and a description of the POSIX API in the <b>pcreposix</b> page. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcre_copy_named_substring.3.html b/spamfilter/Utilities/PCRE/man/html/pcre_copy_named_substring.3.html
new file mode 100644
index 0000000..09c341f
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcre_copy_named_substring.3.html
@@ -0,0 +1,48 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+ <p>
+<b>#include &lt;pcre.h&gt;</b> <p>
+<font size='-1'></font>
+ <br>
+<b>int pcre_copy_named_substring(const pcre *<i>code</i>,</b> <b>const char *<i>subject</i>, int
+*<i>ovector</i>,</b> <b>int <i>stringcount</i>, const char *<i>stringname</i>,</b> <b>char *<i>buffer</i>, int
+<i>buffersize</i>);</b>
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+ <p>
+This is a convenience function for extracting
+a captured substring, identified by name, into a given buffer. The arguments
+are: <p>
+ <i>code</i> Pattern that was successfully matched<br>
+ <i>subject</i> Subject that has been successfully matched<br>
+ <i>ovector</i> Offset vector that <b>pcre_exec()</b> used<br>
+ <i>stringcount</i> Value returned by <b>pcre_exec()</b><br>
+ <i>stringname</i> Name of the required substring<br>
+ <i>buffer</i> Buffer to receive the string<br>
+ <i>buffersize</i> Size of buffer<br>
+ <p>
+The yield is the length of the substring, PCRE_ERROR_NOMEMORY if the buffer
+was too small, or PCRE_ERROR_NOSUBSTRING if the string name is invalid.
+<p>
+There is a complete description of the PCRE native API in the <b>pcreapi</b>
+ page and a description of the POSIX API in the <b>pcreposix</b> page. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcre_copy_substring.3.html b/spamfilter/Utilities/PCRE/man/html/pcre_copy_substring.3.html
new file mode 100644
index 0000000..c706691
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcre_copy_substring.3.html
@@ -0,0 +1,47 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+ <p>
+<b>#include &lt;pcre.h&gt;</b> <p>
+<font size='-1'></font>
+ <br>
+<b>int pcre_copy_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b> <b>int <i>stringcount</i>,
+int <i>stringnumber</i>, char *<i>buffer</i>,</b> <b>int <i>buffersize</i>);</b>
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+ <p>
+This is
+a convenience function for extracting a captured substring into a given
+buffer. The arguments are: <p>
+ <i>subject</i> Subject that has been successfully
+matched<br>
+ <i>ovector</i> Offset vector that <b>pcre_exec()</b> used<br>
+ <i>stringcount</i> Value returned by <b>pcre_exec()</b><br>
+ <i>stringnumber</i> Number of the required substring<br>
+ <i>buffer</i> Buffer to receive the string<br>
+ <i>buffersize</i> Size of buffer<br>
+ <p>
+The yield is the legnth of the string, PCRE_ERROR_NOMEMORY if the buffer
+was too small, or PCRE_ERROR_NOSUBSTRING if the string number is invalid.
+<p>
+There is a complete description of the PCRE native API in the <b>pcreapi</b>
+ page and a description of the POSIX API in the <b>pcreposix</b> page. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcre_exec.3.html b/spamfilter/Utilities/PCRE/man/html/pcre_exec.3.html
new file mode 100644
index 0000000..c985429
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcre_exec.3.html
@@ -0,0 +1,71 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+ <p>
+<b>#include &lt;pcre.h&gt;</b> <p>
+<font size='-1'></font>
+ <br>
+<b>int pcre_exec(const pcre *<i>code</i>, "const pcre_extra *<i>extra</i>,"</b> <b>const char
+*<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b> <b>int <i>options</i>, int *<i>ovector</i>, int
+<i>ovecsize</i>);</b>
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+ <p>
+This function matches a compiled regular expression
+against a given subject string, and returns offsets to capturing subexpressions.
+Its arguments are: <p>
+ <i>code</i> Points to the compiled pattern<br>
+ <i>extra</i> Points to an associated <b>pcre_extra</b> structure,<br>
+ or is NULL<br>
+ <i>subject</i> Points to the subject string<br>
+ <i>length</i> Length of the subject string, in bytes<br>
+ <i>startoffset</i> Offset in bytes in the subject at which to<br>
+ start matching<br>
+ <i>options</i> Option bits<br>
+ <i>ovector</i> Points to a vector of ints for result offsets<br>
+ <i>ovecsize</i> Number of elements in the vector (a multiple of 3)<br>
+ <p>
+The options are: <p>
+ PCRE_ANCHORED Match only at the first position<br>
+ PCRE_NOTBOL Subject is not the beginning of a line<br>
+ PCRE_NOTEOL Subject is not the end of a line<br>
+ PCRE_NOTEMPTY An empty string is not a valid match<br>
+ PCRE_NO_UTF8_CHECK Do not check the subject for UTF-8<br>
+ validity (only relevant if PCRE_UTF8<br>
+ was set at compile time)<br>
+ PCRE_PARTIAL Return PCRE_ERROR_PARTIAL for a partial match<br>
+ <p>
+There are restrictions on what may appear in a pattern when partial matching
+is requested. <p>
+A <b>pcre_extra</b> structure contains the following fields: <p>
+ <i>flags</i>
+ Bits indicating which fields are set<br>
+ <i>study_data</i> Opaque data from <b>pcre_study()</b><br>
+ <i>match_limit</i> Limit on internal recursion<br>
+ <i>callout_data</i> Opaque data passed back to callouts<br>
+ <i>tables</i> Points to character tables or is NULL<br>
+ <p>
+The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT, PCRE_EXTRA_CALLOUT_DATA,
+and PCRE_EXTRA_TABLES. <p>
+There is a complete description of the PCRE native
+API in the <b>pcreapi</b> page and a description of the POSIX API in the <b>pcreposix</b>
+ page. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcre_free_substring.3.html b/spamfilter/Utilities/PCRE/man/html/pcre_free_substring.3.html
new file mode 100644
index 0000000..60a97b6
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcre_free_substring.3.html
@@ -0,0 +1,37 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+ <p>
+<b>#include &lt;pcre.h&gt;</b> <p>
+<font size='-1'></font>
+ <br>
+<b>void pcre_free_substring(const char *<i>stringptr</i>);</b>
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+ <p>
+This is a
+convenience function for freeing the store obtained by a previous call
+to <b>pcre_get_substring()</b> or <b>pcre_get_named_substring()</b>. Its only argument
+is a pointer to the string. <p>
+There is a complete description of the PCRE
+native API in the <b>pcreapi</b> page and a description of the POSIX API in
+the <b>pcreposix</b> page. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcre_free_substring_list.3.html b/spamfilter/Utilities/PCRE/man/html/pcre_free_substring_list.3.html
new file mode 100644
index 0000000..a66f3cc
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcre_free_substring_list.3.html
@@ -0,0 +1,37 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+ <p>
+<b>#include &lt;pcre.h&gt;</b> <p>
+<font size='-1'></font>
+ <br>
+<b>void pcre_free_substring_list(const char **<i>stringptr</i>);</b>
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+ <p>
+This
+is a convenience function for freeing the store obtained by a previous
+call to <b>pcre_get_substring_list()</b>. Its only argument is a pointer to the
+list of string pointers. <p>
+There is a complete description of the PCRE native
+API in the <b>pcreapi</b> page and a description of the POSIX API in the <b>pcreposix</b>
+ page. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcre_fullinfo.3.html b/spamfilter/Utilities/PCRE/man/html/pcre_fullinfo.3.html
new file mode 100644
index 0000000..30392fc
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcre_fullinfo.3.html
@@ -0,0 +1,67 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+ <p>
+<b>#include &lt;pcre.h&gt;</b> <p>
+<font size='-1'></font>
+ <br>
+<b>int pcre_fullinfo(const pcre *<i>code</i>, "const pcre_extra *<i>extra</i>,"</b> <b>int <i>what</i>,
+void *<i>where</i>);</b>
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+ <p>
+This function returns information about a compiled
+pattern. Its arguments are: <p>
+ <i>code</i> Compiled regular
+expression<br>
+ <i>extra</i> Result of <b>pcre_study()</b> or NULL<br>
+ <i>what</i> What information is required<br>
+ <i>where</i> Where to put the information<br>
+ <p>
+The following information is available: <p>
+ PCRE_INFO_BACKREFMAX Number
+of highest back reference<br>
+ PCRE_INFO_CAPTURECOUNT Number of capturing subpatterns<br>
+ PCRE_INFO_DEFAULT_TABLES Pointer to default tables<br>
+ PCRE_INFO_FIRSTBYTE Fixed first byte for a match, or<br>
+ -1 for start of string<br>
+ or after newline, or<br>
+ -2 otherwise<br>
+ PCRE_INFO_FIRSTTABLE Table of first bytes<br>
+ (after studying)<br>
+ PCRE_INFO_LASTLITERAL Literal last byte required<br>
+ PCRE_INFO_NAMECOUNT Number of named subpatterns<br>
+ PCRE_INFO_NAMEENTRYSIZE Size of name table entry<br>
+ PCRE_INFO_NAMETABLE Pointer to name table<br>
+ PCRE_INFO_OPTIONS Options used for compilation<br>
+ PCRE_INFO_SIZE Size of compiled pattern<br>
+ PCRE_INFO_STUDYSIZE Size of study data<br>
+ <p>
+The yield of the function is zero on success or: <p>
+ PCRE_ERROR_NULL
+ the argument <i>code</i> was NULL<br>
+ the argument <i>where</i> was NULL<br>
+ PCRE_ERROR_BADMAGIC the "magic number" was not found<br>
+ PCRE_ERROR_BADOPTION the value of <i>what</i> was invalid<br>
+ <p>
+There is a complete description of the PCRE native API in the <b>pcreapi</b>
+ page and a description of the POSIX API in the <b>pcreposix</b> page. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcre_get_named_substring.3.html b/spamfilter/Utilities/PCRE/man/html/pcre_get_named_substring.3.html
new file mode 100644
index 0000000..14f1c49
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcre_get_named_substring.3.html
@@ -0,0 +1,48 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+ <p>
+<b>#include &lt;pcre.h&gt;</b> <p>
+<font size='-1'></font>
+ <br>
+<b>int pcre_get_named_substring(const pcre *<i>code</i>,</b> <b>const char *<i>subject</i>, int
+*<i>ovector</i>,</b> <b>int <i>stringcount</i>, const char *<i>stringname</i>,</b> <b>const char **<i>stringptr</i>);</b>
+
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+ <p>
+This is a convenience function for extracting a captured substring
+by name. The arguments are: <p>
+ <i>code</i> Compiled pattern<br>
+ <i>subject</i> Subject that has been successfully matched<br>
+ <i>ovector</i> Offset vector that <b>pcre_exec()</b> used<br>
+ <i>stringcount</i> Value returned by <b>pcre_exec()</b><br>
+ <i>stringname</i> Name of the required substring<br>
+ <i>stringptr</i> Where to put the string pointer<br>
+ <p>
+The memory in which the substring is placed is obtained by calling <b>pcre_malloc()</b>.
+The yield of the function is the length of the extracted substring, PCRE_ERROR_NOMEMORY
+if sufficient memory could not be obtained, or PCRE_ERROR_NOSUBSTRING if
+the string name is invalid. <p>
+There is a complete description of the PCRE
+native API in the <b>pcreapi</b> page and a description of the POSIX API in
+the <b>pcreposix</b> page. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcre_get_stringnumber.3.html b/spamfilter/Utilities/PCRE/man/html/pcre_get_stringnumber.3.html
new file mode 100644
index 0000000..1ca280c
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcre_get_stringnumber.3.html
@@ -0,0 +1,42 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+ <p>
+<b>#include &lt;pcre.h&gt;</b> <p>
+<font size='-1'></font>
+ <br>
+<b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b> <b>const char *<i>name</i>);</b>
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+
+<p>
+This convenience function finds the number of a named substring capturing
+parenthesis in a compiled pattern. Its arguments are: <p>
+ <i>code</i> Compiled
+regular expression<br>
+ <i>name</i> Name whose number is required<br>
+ <p>
+The yield of the function is the number of the parenthesis if the name
+is found, or PCRE_ERROR_NOSUBSTRING otherwise. <p>
+There is a complete description
+of the PCRE native API in the <b>pcreapi</b> page and a description of the POSIX
+API in the <b>pcreposix</b> page. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcre_get_substring.3.html b/spamfilter/Utilities/PCRE/man/html/pcre_get_substring.3.html
new file mode 100644
index 0000000..566a393
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcre_get_substring.3.html
@@ -0,0 +1,47 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+ <p>
+<b>#include &lt;pcre.h&gt;</b> <p>
+<font size='-1'></font>
+ <br>
+<b>int pcre_get_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b> <b>int <i>stringcount</i>,
+int <i>stringnumber</i>,</b> <b>const char **<i>stringptr</i>);</b>
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+ <p>
+This is a convenience
+function for extracting a captured substring. The arguments are: <p>
+ <i>subject</i>
+ Subject that has been successfully matched<br>
+ <i>ovector</i> Offset vector that <b>pcre_exec()</b> used<br>
+ <i>stringcount</i> Value returned by <b>pcre_exec()</b><br>
+ <i>stringnumber</i> Number of the required substring<br>
+ <i>stringptr</i> Where to put the string pointer<br>
+ <p>
+The memory in which the substring is placed is obtained by calling <b>pcre_malloc()</b>.
+The yield of the function is the length of the substring, PCRE_ERROR_NOMEMORY
+if sufficient memory could not be obtained, or PCRE_ERROR_NOSUBSTRING if
+the string number is invalid. <p>
+There is a complete description of the PCRE
+native API in the <b>pcreapi</b> page and a description of the POSIX API in
+the <b>pcreposix</b> page. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcre_get_substring_list.3.html b/spamfilter/Utilities/PCRE/man/html/pcre_get_substring_list.3.html
new file mode 100644
index 0000000..f4e5931
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcre_get_substring_list.3.html
@@ -0,0 +1,47 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+ <p>
+<b>#include &lt;pcre.h&gt;</b> <p>
+<font size='-1'></font>
+ <br>
+<b>int pcre_get_substring_list(const char *<i>subject</i>,</b> <b>int *<i>ovector</i>, int <i>stringcount</i>,
+"const char ***<i>listptr</i>);"</b>
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+ <p>
+This is a convenience function for
+extracting a list of all the captured substrings. The arguments are: <p>
+ <i>subject</i>
+ Subject that has been successfully matched<br>
+ <i>ovector</i> Offset vector that <b>pcre_exec</b> used<br>
+ <i>stringcount</i> Value returned by <b>pcre_exec</b><br>
+ <i>listptr</i> Where to put a pointer to the list<br>
+ <p>
+The memory in which the substrings and the list are placed is obtained
+by calling <b>pcre_malloc()</b>. A pointer to a list of pointers is put in the
+variable whose address is in <i>listptr</i>. The list is terminated by a NULL pointer.
+The yield of the function is zero on success or PCRE_ERROR_NOMEMORY if
+sufficient memory could not be obtained. <p>
+There is a complete description
+of the PCRE native API in the <b>pcreapi</b> page and a description of the POSIX
+API in the <b>pcreposix</b> page. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcre_info.3.html b/spamfilter/Utilities/PCRE/man/html/pcre_info.3.html
new file mode 100644
index 0000000..caf66db
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcre_info.3.html
@@ -0,0 +1,35 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+ <p>
+<b>#include &lt;pcre.h&gt;</b> <p>
+<font size='-1'></font>
+ <br>
+<b>int pcre_info(const pcre *<i>code</i>, int *<i>optptr</i>, int</b> <b>*<i>firstcharptr</i>);</b>
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+
+<p>
+This function is obsolete. You should be using <b>pcre_fullinfo()</b> instead. <p>
+There
+is a complete description of the PCRE native API in the <b>pcreapi</b> page
+and a description of the POSIX API in the <b>pcreposix</b> page. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcre_maketables.3.html b/spamfilter/Utilities/PCRE/man/html/pcre_maketables.3.html
new file mode 100644
index 0000000..7a73848
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcre_maketables.3.html
@@ -0,0 +1,39 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+ <p>
+<b>#include &lt;pcre.h&gt;</b> <p>
+<font size='-1'></font>
+ <br>
+<b>const unsigned char *pcre_maketables(void);</b>
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+ <p>
+This function
+builds a set of character tables for character values less than 256. These
+can be passed to <b>pcre_compile()</b> to override PCRE&rsquo;s internal, built-in tables
+(which were made by <b>pcre_maketables()</b> when PCRE was compiled). You might
+want to do this if you are using a non-standard locale. The function yields
+a pointer to the tables. <p>
+There is a complete description of the PCRE native
+API in the <b>pcreapi</b> page and a description of the POSIX API in the <b>pcreposix</b>
+ page. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcre_study.3.html b/spamfilter/Utilities/PCRE/man/html/pcre_study.3.html
new file mode 100644
index 0000000..20a7a67
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcre_study.3.html
@@ -0,0 +1,49 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+ <p>
+<b>#include &lt;pcre.h&gt;</b> <p>
+<font size='-1'></font>
+ <br>
+<b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i>,</b> <b>const char **<i>errptr</i>);</b>
+
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+ <p>
+This function studies a compiled pattern, to see if additional
+information can be extracted that might speed up matching. Its arguments
+are: <p>
+ <i>code</i> A compiled regular expression<br>
+ <i>options</i> Options for <b>pcre_study()</b><br>
+ <i>errptr</i> Where to put an error message<br>
+ <p>
+If the function succeeds, it returns a value that can be passed to <b>pcre_exec()</b>
+via its <i>extra</i> argument. <p>
+If the function returns NULL, either it could not
+find any additional information, or there was an error. You can tell the
+difference by looking at the error value. It is NULL in first case. <p>
+There
+are currently no options defined; the value of the second argument should
+always be zero. <p>
+There is a complete description of the PCRE native API in
+the <b>pcreapi</b> page and a description of the POSIX API in the <b>pcreposix</b>
+ page. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcre_version.3.html b/spamfilter/Utilities/PCRE/man/html/pcre_version.3.html
new file mode 100644
index 0000000..fe50a95
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcre_version.3.html
@@ -0,0 +1,36 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+ <p>
+<b>#include &lt;pcre.h&gt;</b> <p>
+<font size='-1'></font>
+ <br>
+<b>char *pcre_version(void);</b>
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+ <p>
+This function returns a character
+string that gives the version number of the PCRE library and the date of
+its release. <p>
+There is a complete description of the PCRE native API in the
+ <b>pcreapi</b> page and a description of the POSIX API in the <b>pcreposix</b> page.
+<p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcreapi.3.html b/spamfilter/Utilities/PCRE/man/html/pcreapi.3.html
new file mode 100644
index 0000000..a083204
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcreapi.3.html
@@ -0,0 +1,1069 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Pcre Native API</a></h2>
+ <p>
+<b>#include &lt;pcre.h&gt;</b>
+<p>
+<font size='-1'></font>
+ <br>
+<b>pcre *pcre_compile(const char *<i>pattern</i>, int <i>options</i>,</b> <b>const char **<i>errptr</i>,
+int *<i>erroffset</i>,</b> <b>const unsigned char *<i>tableptr</i>);</b> <p>
+<br>
+<b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i>,</b> <b>const char **<i>errptr</i>);</b>
+<p>
+<br>
+<b>int pcre_exec(const pcre *<i>code</i>, "const pcre_extra *<i>extra</i>,"</b> <b>const char
+*<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b> <b>int <i>options</i>, int *<i>ovector</i>, int
+<i>ovecsize</i>);</b> <p>
+<br>
+<b>int pcre_copy_named_substring(const pcre *<i>code</i>,</b> <b>const char *<i>subject</i>, int
+*<i>ovector</i>,</b> <b>int <i>stringcount</i>, const char *<i>stringname</i>,</b> <b>char *<i>buffer</i>, int
+<i>buffersize</i>);</b> <p>
+<br>
+<b>int pcre_copy_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b> <b>int <i>stringcount</i>,
+int <i>stringnumber</i>, char *<i>buffer</i>,</b> <b>int <i>buffersize</i>);</b> <p>
+<br>
+<b>int pcre_get_named_substring(const pcre *<i>code</i>,</b> <b>const char *<i>subject</i>, int
+*<i>ovector</i>,</b> <b>int <i>stringcount</i>, const char *<i>stringname</i>,</b> <b>const char **<i>stringptr</i>);</b>
+<p>
+<br>
+<b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b> <b>const char *<i>name</i>);</b> <p>
+<br>
+<b>int pcre_get_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b> <b>int <i>stringcount</i>,
+int <i>stringnumber</i>,</b> <b>const char **<i>stringptr</i>);</b> <p>
+<br>
+<b>int pcre_get_substring_list(const char *<i>subject</i>,</b> <b>int *<i>ovector</i>, int <i>stringcount</i>,
+"const char ***<i>listptr</i>);"</b> <p>
+<br>
+<b>void pcre_free_substring(const char *<i>stringptr</i>);</b> <p>
+<br>
+<b>void pcre_free_substring_list(const char **<i>stringptr</i>);</b> <p>
+<br>
+<b>const unsigned char *pcre_maketables(void);</b> <p>
+<br>
+<b>int pcre_fullinfo(const pcre *<i>code</i>, "const pcre_extra *<i>extra</i>,"</b> <b>int <i>what</i>,
+void *<i>where</i>);</b> <p>
+<br>
+<b>int pcre_info(const pcre *<i>code</i>, int *<i>optptr</i>, int</b> <b>*<i>firstcharptr</i>);</b> <p>
+<br>
+<b>int pcre_config(int <i>what</i>, void *<i>where</i>);</b> <p>
+<br>
+<b>char *pcre_version(void);</b> <p>
+<br>
+<b>void *(*pcre_malloc)(size_t);</b> <p>
+<br>
+<b>void (*pcre_free)(void *);</b> <p>
+<br>
+<b>void *(*pcre_stack_malloc)(size_t);</b> <p>
+<br>
+<b>void (*pcre_stack_free)(void *);</b> <p>
+<br>
+<b>int (*pcre_callout)(pcre_callout_block *);</b>
+<h2><a name='sect2' href='#toc2'>Pcre API Overview</a></h2>
+ <p>
+PCRE has
+its own native API, which is described in this document. There is also a
+set of wrapper functions that correspond to the POSIX regular expression
+API. These are described in the <b>pcreposix</b> documentation. <p>
+The native API
+function prototypes are defined in the header file <b>pcre.h</b>, and on Unix systems
+the library itself is called <b>libpcre</b>. It can normally be accessed by adding
+<b>-lpcre</b> to the command for linking an application that uses PCRE. The header
+file defines the macros PCRE_MAJOR and PCRE_MINOR to contain the major
+and minor release numbers for the library. Applications can use these to
+include support for different releases of PCRE. <p>
+The functions <b>pcre_compile()</b>,
+<b>pcre_study()</b>, and <b>pcre_exec()</b> are used for compiling and matching regular
+expressions. A sample program that demonstrates the simplest way of using
+them is provided in the file called <i>pcredemo.c</i> in the source distribution.
+The <b>pcresample</b> documentation describes how to run it. <p>
+In addition to the
+main compiling and matching functions, there are convenience functions
+for extracting captured substrings from a matched subject string. They are:
+<p>
+ <b>pcre_copy_substring()</b><br>
+ <b>pcre_copy_named_substring()</b><br>
+ <b>pcre_get_substring()</b><br>
+ <b>pcre_get_named_substring()</b><br>
+ <b>pcre_get_substring_list()</b><br>
+ <b>pcre_get_stringnumber()</b><br>
+ <p>
+<b>pcre_free_substring()</b> and <b>pcre_free_substring_list()</b> are also provided,
+to free the memory used for extracted strings. <p>
+The function <b>pcre_maketables()</b>
+is used to build a set of character tables in the current locale for passing
+to <b>pcre_compile()</b> or <b>pcre_exec()</b>. This is an optional facility that is provided
+for specialist use. Most commonly, no special tables are passed, in which
+case internal tables that are generated when PCRE is built are used. <p>
+The
+function <b>pcre_fullinfo()</b> is used to find out information about a compiled
+pattern; <b>pcre_info()</b> is an obsolete version that returns only some of the
+available information, but is retained for backwards compatibility. The
+function <b>pcre_version()</b> returns a pointer to a string containing the version
+of PCRE and its date of release. <p>
+The global variables <b>pcre_malloc</b> and <b>pcre_free</b>
+initially contain the entry points of the standard <b>malloc()</b> and <b>free()</b>
+functions, respectively. PCRE calls the memory management functions via
+these variables, so a calling program can replace them if it wishes to
+intercept the calls. This should be done before calling any PCRE functions.
+<p>
+The global variables <b>pcre_stack_malloc</b> and <b>pcre_stack_free</b> are also indirections
+to memory management functions. These special functions are used only when
+PCRE is compiled to use the heap for remembering data, instead of recursive
+function calls. This is a non-standard way of building PCRE, for use in environments
+that have limited stacks. Because of the greater use of memory management,
+it runs more slowly. Separate functions are provided so that special-purpose
+external code can be used for this case. When used, these functions are
+always called in a stack-like manner (last obtained, first freed), and always
+for memory blocks of the same size. <p>
+The global variable <b>pcre_callout</b> initially
+contains NULL. It can be set by the caller to a "callout" function, which
+PCRE will then call at specified points during a matching operation. Details
+are given in the <b>pcrecallout</b> documentation.
+<h2><a name='sect3' href='#toc3'>Multithreading</a></h2>
+ <p>
+The PCRE
+functions can be used in multi-threading applications, with the proviso
+that the memory management functions pointed to by <b>pcre_malloc</b>, <b>pcre_free</b>,
+<b>pcre_stack_malloc</b>, and <b>pcre_stack_free</b>, and the callout function pointed
+to by <b>pcre_callout</b>, are shared by all threads. <p>
+The compiled form of a regular
+expression is not altered during matching, so the same compiled pattern
+can safely be used by several threads at once.
+<h2><a name='sect4' href='#toc4'>Saving Precompiled Patterns
+for Later Use</a></h2>
+ <p>
+The compiled form of a regular expression can be saved and
+re-used at a later time, possibly by a different program, and even on a
+host other than the one on which it was compiled. Details are given in the
+ <b>pcreprecompile</b> documentation.
+<h2><a name='sect5' href='#toc5'>Checking Build-time Options</a></h2>
+ <p>
+<b>int pcre_config(int
+<i>what</i>, void *<i>where</i>);</b> <p>
+The function <b>pcre_config()</b> makes it possible for a
+PCRE client to discover which optional features have been compiled into
+the PCRE library. The <b>pcrebuild</b> documentation has more details about these
+optional features. <p>
+The first argument for <b>pcre_config()</b> is an integer, specifying
+which information is required; the second argument is a pointer to a variable
+into which the information is placed. The following information is available:
+<p>
+ PCRE_CONFIG_UTF8<br>
+ <p>
+The output is an integer that is set to one if UTF-8 support is available;
+otherwise it is set to zero. <p>
+ PCRE_CONFIG_UNICODE_PROPERTIES<br>
+ <p>
+The output is an integer that is set to one if support for Unicode character
+properties is available; otherwise it is set to zero. <p>
+ PCRE_CONFIG_NEWLINE<br>
+ <p>
+The output is an integer that is set to the value of the code that is
+used for the newline character. It is either linefeed (10) or carriage return
+(13), and should normally be the standard character for your operating
+system. <p>
+ PCRE_CONFIG_LINK_SIZE<br>
+ <p>
+The output is an integer that contains the number of bytes used for internal
+linkage in compiled regular expressions. The value is 2, 3, or 4. Larger
+values allow larger regular expressions to be compiled, at the expense
+of slower matching. The default value of 2 is sufficient for all but the
+most massive patterns, since it allows the compiled pattern to be up to
+64K in size. <p>
+ PCRE_CONFIG_POSIX_MALLOC_THRESHOLD<br>
+ <p>
+The output is an integer that contains the threshold above which the POSIX
+interface uses <b>malloc()</b> for output vectors. Further details are given in
+the <b>pcreposix</b> documentation. <p>
+ PCRE_CONFIG_MATCH_LIMIT<br>
+ <p>
+The output is an integer that gives the default limit for the number of
+internal matching function calls in a <b>pcre_exec()</b> execution. Further details
+are given with <b>pcre_exec()</b> below. <p>
+ PCRE_CONFIG_STACKRECURSE<br>
+ <p>
+The output is an integer that is set to one if internal recursion is implemented
+by recursive function calls that use the stack to remember their state.
+This is the usual way that PCRE is compiled. The output is zero if PCRE
+was compiled to use blocks of data on the heap instead of recursive function
+calls. In this case, <b>pcre_stack_malloc</b> and <b>pcre_stack_free</b> are called to
+manage memory blocks on the heap, thus avoiding the use of the stack.
+
+<h2><a name='sect6' href='#toc6'>Compiling a Pattern</a></h2>
+ <p>
+<b>pcre *pcre_compile(const char *<i>pattern</i>, int <i>options</i>,</b>
+ <b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b> <b>const unsigned char *<i>tableptr</i>);</b>
+<p>
+The function <b>pcre_compile()</b> is called to compile a pattern into an internal
+form. The pattern is a C string terminated by a binary zero, and is passed
+in the <i>pattern</i> argument. A pointer to a single block of memory that is obtained
+via <b>pcre_malloc</b> is returned. This contains the compiled code and related
+data. The <b>pcre</b> type is defined for the returned block; this is a typedef
+for a structure whose contents are not externally defined. It is up to the
+caller to free the memory when it is no longer required. <p>
+Although the compiled
+code of a PCRE regex is relocatable, that is, it does not depend on memory
+location, the complete <b>pcre</b> data block is not fully relocatable, because
+it may contain a copy of the <i>tableptr</i> argument, which is an address (see
+below). <p>
+The <i>options</i> argument contains independent bits that affect the compilation.
+It should be zero if no options are required. The available options are
+described below. Some of them, in particular, those that are compatible
+with Perl, can also be set and unset from within the pattern (see the detailed
+description in the <b>pcrepattern</b> documentation). For these options, the
+contents of the <i>options</i> argument specifies their initial settings at the
+start of compilation and execution. The PCRE_ANCHORED option can be set
+at the time of matching as well as at compile time. <p>
+If <i>errptr</i> is NULL, <b>pcre_compile()</b>
+returns NULL immediately. Otherwise, if compilation of a pattern fails,
+<b>pcre_compile()</b> returns NULL, and sets the variable pointed to by <i>errptr</i>
+to point to a textual error message. The offset from the start of the pattern
+to the character where the error was discovered is placed in the variable
+pointed to by <i>erroffset</i>, which must not be NULL. If it is, an immediate
+error is given. <p>
+If the final argument, <i>tableptr</i>, is NULL, PCRE uses a default
+set of character tables that are built when PCRE is compiled, using the
+default C locale. Otherwise, <i>tableptr</i> must be an address that is the result
+of a call to <b>pcre_maketables()</b>. This value is stored with the compiled pattern,
+and used again by <b>pcre_exec()</b>, unless another table pointer is passed to
+it. For more discussion, see the section on locale support below. <p>
+This code
+fragment shows a typical straightforward call to <b>pcre_compile()</b>: <p>
+ pcre
+*re;<br>
+ const char *error;<br>
+ int erroffset;<br>
+ re = pcre_compile(<br>
+ "^A.*Z", /* the pattern */<br>
+ 0, /* default options */<br>
+ &amp;error, /* for error message */<br>
+ &amp;erroffset, /* for error offset */<br>
+ NULL); /* use default character tables */<br>
+ <p>
+The following names for option bits are defined in the <b>pcre.h</b> header file:
+<p>
+ PCRE_ANCHORED<br>
+ <p>
+If this bit is set, the pattern is forced to be "anchored", that is, it
+is constrained to match only at the first matching point in the string
+that is being searched (the "subject string"). This effect can also be achieved
+by appropriate constructs in the pattern itself, which is the only way
+to do it in Perl. <p>
+ PCRE_AUTO_CALLOUT<br>
+ <p>
+If this bit is set, <b>pcre_compile()</b> automatically inserts callout items,
+all with number 255, before each pattern item. For discussion of the callout
+facility, see the <b>pcrecallout</b> documentation. <p>
+ PCRE_CASELESS<br>
+ <p>
+If this bit is set, letters in the pattern match both upper and lower
+case letters. It is equivalent to Perl&rsquo;s /i option, and it can be changed
+within a pattern by a (?i) option setting. When running in UTF-8 mode, case
+support for high-valued characters is available only when PCRE is built
+with Unicode character property support. <p>
+ PCRE_DOLLAR_ENDONLY<br>
+ <p>
+If this bit is set, a dollar metacharacter in the pattern matches only
+at the end of the subject string. Without this option, a dollar also matches
+immediately before the final character if it is a newline (but not before
+any other newlines). The PCRE_DOLLAR_ENDONLY option is ignored if PCRE_MULTILINE
+is set. There is no equivalent to this option in Perl, and no way to set
+it within a pattern. <p>
+ PCRE_DOTALL<br>
+ <p>
+If this bit is set, a dot metacharater in the pattern matches all characters,
+including newlines. Without it, newlines are excluded. This option is equivalent
+to Perl&rsquo;s /s option, and it can be changed within a pattern by a (?s) option
+setting. A negative class such as [^a] always matches a newline character,
+independent of the setting of this option. <p>
+ PCRE_EXTENDED<br>
+ <p>
+If this bit is set, whitespace data characters in the pattern are totally
+ignored except when escaped or inside a character class. Whitespace does
+not include the VT character (code 11). In addition, characters between
+an unescaped # outside a character class and the next newline character,
+inclusive, are also ignored. This is equivalent to Perl&rsquo;s /x option, and
+it can be changed within a pattern by a (?x) option setting. <p>
+This option
+makes it possible to include comments inside complicated patterns. Note,
+however, that this applies only to data characters. Whitespace characters
+may never appear within special character sequences in a pattern, for example
+within the sequence (?( which introduces a conditional subpattern. <p>
+ PCRE_EXTRA<br>
+ <p>
+This option was invented in order to turn on additional functionality
+of PCRE that is incompatible with Perl, but it is currently of very little
+use. When set, any backslash in a pattern that is followed by a letter that
+has no special meaning causes an error, thus reserving these combinations
+for future expansion. By default, as in Perl, a backslash followed by a
+letter with no special meaning is treated as a literal. There are at present
+no other features controlled by this option. It can also be set by a (?X)
+option setting within a pattern. <p>
+ PCRE_MULTILINE<br>
+ <p>
+By default, PCRE treats the subject string as consisting of a single line
+of characters (even if it actually contains newlines). The "start of line"
+metacharacter (^) matches only at the start of the string, while the "end
+of line" metacharacter ($) matches only at the end of the string, or before
+a terminating newline (unless PCRE_DOLLAR_ENDONLY is set). This is the same
+as Perl. <p>
+When PCRE_MULTILINE it is set, the "start of line" and "end of
+line" constructs match immediately following or immediately before any
+newline in the subject string, respectively, as well as at the very start
+and end. This is equivalent to Perl&rsquo;s /m option, and it can be changed within
+a pattern by a (?m) option setting. If there are no "\n" characters in a
+subject string, or no occurrences of ^ or $ in a pattern, setting PCRE_MULTILINE
+has no effect. <p>
+ PCRE_NO_AUTO_CAPTURE<br>
+ <p>
+If this option is set, it disables the use of numbered capturing parentheses
+in the pattern. Any opening parenthesis that is not followed by ? behaves
+as if it were followed by ?: but named parentheses can still be used for
+capturing (and they acquire numbers in the usual way). There is no equivalent
+of this option in Perl. <p>
+ PCRE_UNGREEDY<br>
+ <p>
+This option inverts the "greediness" of the quantifiers so that they are
+not greedy by default, but become greedy if followed by "?". It is not compatible
+with Perl. It can also be set by a (?U) option setting within the pattern.
+<p>
+ PCRE_UTF8<br>
+ <p>
+This option causes PCRE to regard both the pattern and the subject as
+strings of UTF-8 characters instead of single-byte character strings. However,
+it is available only when PCRE is built to include UTF-8 support. If not,
+the use of this option provokes an error. Details of how this option changes
+the behaviour of PCRE are given in the section on UTF-8 support in the
+main <b>pcre</b> page. <p>
+ PCRE_NO_UTF8_CHECK<br>
+ <p>
+When PCRE_UTF8 is set, the validity of the pattern as a UTF-8 string is
+automatically checked. If an invalid UTF-8 sequence of bytes is found, <b>pcre_compile()</b>
+returns an error. If you already know that your pattern is valid, and you
+want to skip this check for performance reasons, you can set the PCRE_NO_UTF8_CHECK
+option. When it is set, the effect of passing an invalid UTF-8 string as
+a pattern is undefined. It may cause your program to crash. Note that this
+option can also be passed to <b>pcre_exec()</b>, to suppress the UTF-8 validity
+checking of subject strings.
+<h2><a name='sect7' href='#toc7'>Studying a Pattern</a></h2>
+ <p>
+<b>pcre_extra *pcre_study(const
+pcre *<i>code</i>, int <i>options</i>,</b> <b>const char **<i>errptr</i>);</b> <p>
+If a compiled pattern is
+going to be used several times, it is worth spending more time analyzing
+it in order to speed up the time taken for matching. The function <b>pcre_study()</b>
+takes a pointer to a compiled pattern as its first argument. If studying
+the pattern produces additional information that will help speed up matching,
+<b>pcre_study()</b> returns a pointer to a <b>pcre_extra</b> block, in which the <i>study_data</i>
+field points to the results of the study. <p>
+The returned value from <b>pcre_study()</b>
+can be passed directly to <b>pcre_exec()</b>. However, a <b>pcre_extra</b> block also
+contains other fields that can be set by the caller before the block is
+passed; these are described below in the section on matching a pattern.
+<p>
+If studying the pattern does not produce any additional information, <b>pcre_study()</b>
+returns NULL. In that circumstance, if the calling program wants to pass
+any of the other fields to <b>pcre_exec()</b>, it must set up its own <b>pcre_extra</b>
+block. <p>
+The second argument of <b>pcre_study()</b> contains option bits. At present,
+no options are defined, and this argument should always be zero. <p>
+The third
+argument for <b>pcre_study()</b> is a pointer for an error message. If studying
+succeeds (even if no data is returned), the variable it points to is set
+to NULL. Otherwise it points to a textual error message. You should therefore
+test the error pointer for NULL after calling <b>pcre_study()</b>, to be sure
+that it has run successfully. <p>
+This is a typical call to <b>pcre_study</b>(): <p>
+
+pcre_extra *pe;<br>
+ pe = pcre_study(<br>
+ re, /* result of pcre_compile() */<br>
+ 0, /* no options exist */<br>
+ &amp;error); /* set to NULL or points to a message */<br>
+ <p>
+At present, studying a pattern is useful only for non-anchored patterns
+that do not have a single fixed starting character. A bitmap of possible
+starting bytes is created.
+<h2><a name='sect8' href='#toc8'>Locale Support</a></h2>
+ <p>
+PCRE handles caseless matching,
+and determines whether characters are letters, digits, or whatever, by
+reference to a set of tables, indexed by character value. (When running
+in UTF-8 mode, this applies only to characters with codes less than 128.
+Higher-valued codes never match escapes such as \w or \d, but can be tested
+with \p if PCRE is built with Unicode character property support.) <p>
+An internal
+set of tables is created in the default C locale when PCRE is built. This
+is used when the final argument of <b>pcre_compile()</b> is NULL, and is sufficient
+for many applications. An alternative set of tables can, however, be supplied.
+These may be created in a different locale from the default. As more and
+more applications change to using Unicode, the need for this locale support
+is expected to die away. <p>
+External tables are built by calling the <b>pcre_maketables()</b>
+function, which has no arguments, in the relevant locale. The result can
+then be passed to <b>pcre_compile()</b> or <b>pcre_exec()</b> as often as necessary. For
+example, to build and use tables that are appropriate for the French locale
+(where accented characters with values greater than 128 are treated as
+letters), the following code could be used: <p>
+ setlocale(LC_CTYPE, "fr_FR");<br>
+ tables = pcre_maketables();<br>
+ re = pcre_compile(..., tables);<br>
+ <p>
+When <b>pcre_maketables()</b> runs, the tables are built in memory that is obtained
+via <b>pcre_malloc</b>. It is the caller&rsquo;s responsibility to ensure that the memory
+containing the tables remains available for as long as it is needed. <p>
+The
+pointer that is passed to <b>pcre_compile()</b> is saved with the compiled pattern,
+and the same tables are used via this pointer by <b>pcre_study()</b> and normally
+also by <b>pcre_exec()</b>. Thus, by default, for any single pattern, compilation,
+studying and matching all happen in the same locale, but different patterns
+can be compiled in different locales. <p>
+It is possible to pass a table pointer
+or NULL (indicating the use of the internal tables) to <b>pcre_exec()</b>. Although
+not intended for this purpose, this facility could be used to match a pattern
+in a different locale from the one in which it was compiled. Passing table
+pointers at run time is discussed below in the section on matching a pattern.
+
+<h2><a name='sect9' href='#toc9'>Information About a Pattern</a></h2>
+ <p>
+<b>int pcre_fullinfo(const pcre *<i>code</i>, "const
+pcre_extra *<i>extra</i>,"</b> <b>int <i>what</i>, void *<i>where</i>);</b> <p>
+The <b>pcre_fullinfo()</b> function
+returns information about a compiled pattern. It replaces the obsolete <b>pcre_info()</b>
+function, which is nevertheless retained for backwards compability (and
+is documented below). <p>
+The first argument for <b>pcre_fullinfo()</b> is a pointer
+to the compiled pattern. The second argument is the result of <b>pcre_study()</b>,
+or NULL if the pattern was not studied. The third argument specifies which
+piece of information is required, and the fourth argument is a pointer
+to a variable to receive the data. The yield of the function is zero for
+success, or one of the following negative numbers: <p>
+ PCRE_ERROR_NULL
+ the argument <i>code</i> was NULL<br>
+ the argument <i>where</i> was NULL<br>
+ PCRE_ERROR_BADMAGIC the "magic number" was not found<br>
+ PCRE_ERROR_BADOPTION the value of <i>what</i> was invalid<br>
+ <p>
+The "magic number" is placed at the start of each compiled pattern as
+an simple check against passing an arbitrary memory pointer. Here is a typical
+call of <b>pcre_fullinfo()</b>, to obtain the length of the compiled pattern:
+<p>
+ int rc;<br>
+ unsigned long int length;<br>
+ rc = pcre_fullinfo(<br>
+ re, /* result of pcre_compile() */<br>
+ pe, /* result of pcre_study(), or NULL */<br>
+ PCRE_INFO_SIZE, /* what is required */<br>
+ &amp;length); /* where to put the data */<br>
+ <p>
+The possible values for the third argument are defined in <b>pcre.h</b>, and are
+as follows: <p>
+ PCRE_INFO_BACKREFMAX<br>
+ <p>
+Return the number of the highest back reference in the pattern. The fourth
+argument should point to an <b>int</b> variable. Zero is returned if there are
+no back references. <p>
+ PCRE_INFO_CAPTURECOUNT<br>
+ <p>
+Return the number of capturing subpatterns in the pattern. The fourth argument
+should point to an <b>int</b> variable. <p>
+ PCRE_INFO_DEFAULTTABLES<br>
+ <p>
+Return a pointer to the internal default character tables within PCRE.
+The fourth argument should point to an <b>unsigned char *</b> variable. This information
+call is provided for internal use by the <b>pcre_study()</b> function. External
+callers can cause PCRE to use its internal tables by passing a NULL table
+pointer. <p>
+ PCRE_INFO_FIRSTBYTE<br>
+ <p>
+Return information about the first byte of any matched string, for a non-anchored
+pattern. (This option used to be called PCRE_INFO_FIRSTCHAR; the old name
+is still recognized for backwards compatibility.) <p>
+If there is a fixed first
+byte, for example, from a pattern such as (cat|cow|coyote), it is returned
+in the integer pointed to by <i>where</i>. Otherwise, if either <p>
+(a) the pattern
+was compiled with the PCRE_MULTILINE option, and every branch starts with
+"^", or <p>
+(b) every branch of the pattern starts with ".*" and PCRE_DOTALL
+is not set (if it were set, the pattern would be anchored), <p>
+-1 is returned,
+indicating that the pattern matches only at the start of a subject string
+or after any newline within the string. Otherwise -2 is returned. For anchored
+patterns, -2 is returned. <p>
+ PCRE_INFO_FIRSTTABLE<br>
+ <p>
+If the pattern was studied, and this resulted in the construction of a
+256-bit table indicating a fixed set of bytes for the first byte in any
+matching string, a pointer to the table is returned. Otherwise NULL is returned.
+The fourth argument should point to an <b>unsigned char *</b> variable. <p>
+ PCRE_INFO_LASTLITERAL<br>
+ <p>
+Return the value of the rightmost literal byte that must exist in any
+matched string, other than at its start, if such a byte has been recorded.
+The fourth argument should point to an <b>int</b> variable. If there is no such
+byte, -1 is returned. For anchored patterns, a last literal byte is recorded
+only if it follows something of variable length. For example, for the pattern
+/^a\d+z\d+/ the returned value is "z", but for /^a\dz\d/ the returned value is
+-1. <p>
+ PCRE_INFO_NAMECOUNT<br>
+ PCRE_INFO_NAMEENTRYSIZE<br>
+ PCRE_INFO_NAMETABLE<br>
+ <p>
+PCRE supports the use of named as well as numbered capturing parentheses.
+The names are just an additional way of identifying the parentheses, which
+still acquire numbers. A convenience function called <b>pcre_get_named_substring()</b>
+is provided for extracting an individual captured substring by name. It
+is also possible to extract the data directly, by first converting the
+name to a number in order to access the correct pointers in the output
+vector (described with <b>pcre_exec()</b> below). To do the conversion, you need
+to use the name-to-number map, which is described by these three values. <p>
+The
+map consists of a number of fixed-size entries. PCRE_INFO_NAMECOUNT gives
+the number of entries, and PCRE_INFO_NAMEENTRYSIZE gives the size of each
+entry; both of these return an <b>int</b> value. The entry size depends on the
+length of the longest name. PCRE_INFO_NAMETABLE returns a pointer to the
+first entry of the table (a pointer to <b>char</b>). The first two bytes of each
+entry are the number of the capturing parenthesis, most significant byte
+first. The rest of the entry is the corresponding name, zero terminated.
+The names are in alphabetical order. For example, consider the following
+pattern (assume PCRE_EXTENDED is set, so white space - including newlines
+- is ignored): <p>
+ (?P&lt;date&gt; (?P&lt;year&gt;(\d\d)?\d\d) -<br>
+ (?P&lt;month&gt;\d\d) - (?P&lt;day&gt;\d\d) )<br>
+ <p>
+There are four named subpatterns, so the table has four entries, and each
+entry in the table is eight bytes long. The table is as follows, with non-printing
+bytes shows in hexadecimal, and undefined bytes shown as ??: <p>
+ 00 01 d
+ a t e 00 ??<br>
+ 00 05 d a y 00 ?? ??<br>
+ 00 04 m o n t h 00<br>
+ 00 02 y e a r 00 ??<br>
+ <p>
+When writing code to extract data from named subpatterns using the name-to-number
+map, remember that the length of each entry is likely to be different for
+each compiled pattern. <p>
+ PCRE_INFO_OPTIONS<br>
+ <p>
+Return a copy of the options with which the pattern was compiled. The fourth
+argument should point to an <b>unsigned long int</b> variable. These option bits
+are those specified in the call to <b>pcre_compile()</b>, modified by any top-level
+option settings within the pattern itself. <p>
+A pattern is automatically anchored
+by PCRE if all of its top-level alternatives begin with one of the following:
+<p>
+ ^ unless PCRE_MULTILINE is set<br>
+ \A always<br>
+ \G always<br>
+ .* if PCRE_DOTALL is set and there are no back<br>
+ references to the subpattern in which .* appears<br>
+ <p>
+For such patterns, the PCRE_ANCHORED bit is set in the options returned
+by <b>pcre_fullinfo()</b>. <p>
+ PCRE_INFO_SIZE<br>
+ <p>
+Return the size of the compiled pattern, that is, the value that was passed
+as the argument to <b>pcre_malloc()</b> when PCRE was getting memory in which
+to place the compiled data. The fourth argument should point to a <b>size_t</b>
+variable. <p>
+ PCRE_INFO_STUDYSIZE<br>
+ <p>
+Return the size of the data block pointed to by the <i>study_data</i> field in
+a <b>pcre_extra</b> block. That is, it is the value that was passed to <b>pcre_malloc()</b>
+when PCRE was getting memory into which to place the data created by <b>pcre_study()</b>.
+The fourth argument should point to a <b>size_t</b> variable.
+<h2><a name='sect10' href='#toc10'>Obsolete Info Function</a></h2>
+
+<p>
+<b>int pcre_info(const pcre *<i>code</i>, int *<i>optptr</i>, int</b> <b>*<i>firstcharptr</i>);</b> <p>
+The <b>pcre_info()</b>
+function is now obsolete because its interface is too restrictive to return
+all the available data about a compiled pattern. New programs should use
+<b>pcre_fullinfo()</b> instead. The yield of <b>pcre_info()</b> is the number of capturing
+subpatterns, or one of the following negative numbers: <p>
+ PCRE_ERROR_NULL
+ the argument <i>code</i> was NULL<br>
+ PCRE_ERROR_BADMAGIC the "magic number" was not found<br>
+ <p>
+If the <i>optptr</i> argument is not NULL, a copy of the options with which the
+pattern was compiled is placed in the integer it points to (see PCRE_INFO_OPTIONS
+above). <p>
+If the pattern is not anchored and the <i>firstcharptr</i> argument is
+not NULL, it is used to pass back information about the first character
+of any matched string (see PCRE_INFO_FIRSTBYTE above).
+<h2><a name='sect11' href='#toc11'>Matching a Pattern</a></h2>
+
+<p>
+<b>int pcre_exec(const pcre *<i>code</i>, "const pcre_extra *<i>extra</i>,"</b> <b>const char
+*<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b> <b>int <i>options</i>, int *<i>ovector</i>, int
+<i>ovecsize</i>);</b> <p>
+The function <b>pcre_exec()</b> is called to match a subject string
+against a compiled pattern, which is passed in the <i>code</i> argument. If the
+pattern has been studied, the result of the study should be passed in the
+<i>extra</i> argument. <p>
+In most applications, the pattern will have been compiled
+(and optionally studied) in the same process that calls <b>pcre_exec()</b>. However,
+it is possible to save compiled patterns and study data, and then use them
+later in different processes, possibly even on different hosts. For a discussion
+about this, see the <b>pcreprecompile</b> documentation. <p>
+Here is an example of
+a simple call to <b>pcre_exec()</b>: <p>
+ int rc;<br>
+ int ovector[30];<br>
+ rc = pcre_exec(<br>
+ re, /* result of pcre_compile() */<br>
+ NULL, /* we didn&rsquo;t study the pattern */<br>
+ "some string", /* the subject string */<br>
+ 11, /* the length of the subject string */<br>
+ 0, /* start at offset 0 in the subject */<br>
+ 0, /* default options */<br>
+ ovector, /* vector of integers for substring information */<br>
+ 30); /* number of elements in the vector (NOT size in bytes)
+*/<br>
+
+<h3><a name='sect12' href='#toc12'>Extra data for <b>pcre_exec()</b></a></h3>
+ <p>
+If the <i>extra</i> argument is not NULL, it must
+point to a <b>pcre_extra</b> data block. The <b>pcre_study()</b> function returns such
+a block (when it doesn&rsquo;t return NULL), but you can also create one for yourself,
+and pass additional information in it. The fields in a <b>pcre_extra</b> block
+are as follows: <p>
+ unsigned long int <i>flags</i>;<br>
+ void *<i>study_data</i>;<br>
+ unsigned long int <i>match_limit</i>;<br>
+ void *<i>callout_data</i>;<br>
+ const unsigned char *<i>tables</i>;<br>
+ <p>
+The <i>flags</i> field is a bitmap that specifies which of the other fields are
+set. The flag bits are: <p>
+ PCRE_EXTRA_STUDY_DATA<br>
+ PCRE_EXTRA_MATCH_LIMIT<br>
+ PCRE_EXTRA_CALLOUT_DATA<br>
+ PCRE_EXTRA_TABLES<br>
+ <p>
+Other flag bits should be set to zero. The <i>study_data</i> field is set in the
+<b>pcre_extra</b> block that is returned by <b>pcre_study()</b>, together with the appropriate
+flag bit. You should not set this yourself, but you may add to the block
+by setting the other fields and their corresponding flag bits. <p>
+The <i>match_limit</i>
+field provides a means of preventing PCRE from using up a vast amount of
+resources when running patterns that are not going to match, but which
+have a very large number of possibilities in their search trees. The classic
+example is the use of nested unlimited repeats. <p>
+Internally, PCRE uses a
+function called <b>match()</b> which it calls repeatedly (sometimes recursively).
+The limit is imposed on the number of times this function is called during
+a match, which has the effect of limiting the amount of recursion and backtracking
+that can take place. For patterns that are not anchored, the count starts
+from zero for each position in the subject string. <p>
+The default limit for
+the library can be set when PCRE is built; the default default is 10 million,
+which handles all but the most extreme cases. You can reduce the default
+by suppling <b>pcre_exec()</b> with a <b>pcre_extra</b> block in which <i>match_limit</i> is
+set to a smaller value, and PCRE_EXTRA_MATCH_LIMIT is set in the <i>flags</i>
+field. If the limit is exceeded, <b>pcre_exec()</b> returns PCRE_ERROR_MATCHLIMIT.
+<p>
+The <i>pcre_callout</i> field is used in conjunction with the "callout" feature,
+which is described in the <b>pcrecallout</b> documentation. <p>
+The <i>tables</i> field
+is used to pass a character tables pointer to <b>pcre_exec()</b>; this overrides
+the value that is stored with the compiled pattern. A non-NULL value is stored
+with the compiled pattern only if custom tables were supplied to <b>pcre_compile()</b>
+via its <i>tableptr</i> argument. If NULL is passed to <b>pcre_exec()</b> using this mechanism,
+it forces PCRE&rsquo;s internal tables to be used. This facility is helpful when
+re-using patterns that have been saved after compiling with an external
+set of tables, because the external tables might be at a different address
+when <b>pcre_exec()</b> is called. See the <b>pcreprecompile</b> documentation for a
+discussion of saving compiled patterns for later use.
+<h3><a name='sect13' href='#toc13'>Option bits for <b>pcre_exec()</b></a></h3>
+
+<p>
+The unused bits of the <i>options</i> argument for <b>pcre_exec()</b> must be zero. The
+only bits that may be set are PCRE_ANCHORED, PCRE_NOTBOL, PCRE_NOTEOL,
+PCRE_NOTEMPTY, PCRE_NO_UTF8_CHECK and PCRE_PARTIAL. <p>
+ PCRE_ANCHORED<br>
+ <p>
+The PCRE_ANCHORED option limits <b>pcre_exec()</b> to matching at the first matching
+position. If a pattern was compiled with PCRE_ANCHORED, or turned out to
+be anchored by virtue of its contents, it cannot be made unachored at matching
+time. <p>
+ PCRE_NOTBOL<br>
+ <p>
+This option specifies that first character of the subject string is not
+the beginning of a line, so the circumflex metacharacter should not match
+before it. Setting this without PCRE_MULTILINE (at compile time) causes
+circumflex never to match. This option affects only the behaviour of the
+circumflex metacharacter. It does not affect \A. <p>
+ PCRE_NOTEOL<br>
+ <p>
+This option specifies that the end of the subject string is not the end
+of a line, so the dollar metacharacter should not match it nor (except
+in multiline mode) a newline immediately before it. Setting this without
+PCRE_MULTILINE (at compile time) causes dollar never to match. This option
+affects only the behaviour of the dollar metacharacter. It does not affect
+\Z or \z. <p>
+ PCRE_NOTEMPTY<br>
+ <p>
+An empty string is not considered to be a valid match if this option is
+set. If there are alternatives in the pattern, they are tried. If all the
+alternatives match the empty string, the entire match fails. For example,
+if the pattern <p>
+ a?b?<br>
+ <p>
+is applied to a string not beginning with "a" or "b", it matches the empty
+string at the start of the subject. With PCRE_NOTEMPTY set, this match is
+not valid, so PCRE searches further into the string for occurrences of
+"a" or "b". <p>
+Perl has no direct equivalent of PCRE_NOTEMPTY, but it does
+make a special case of a pattern match of the empty string within its <b>split()</b>
+function, and when using the /g modifier. It is possible to emulate Perl&rsquo;s
+behaviour after matching a null string by first trying the match again
+at the same offset with PCRE_NOTEMPTY and PCRE_ANCHORED, and then if that
+fails by advancing the starting offset (see below) and trying an ordinary
+match again. There is some code that demonstrates how to do this in the
+<i>pcredemo.c</i> sample program. <p>
+ PCRE_NO_UTF8_CHECK<br>
+ <p>
+When PCRE_UTF8 is set at compile time, the validity of the subject as
+a UTF-8 string is automatically checked when <b>pcre_exec()</b> is subsequently
+called. The value of <i>startoffset</i> is also checked to ensure that it points
+to the start of a UTF-8 character. If an invalid UTF-8 sequence of bytes is
+found, <b>pcre_exec()</b> returns the error PCRE_ERROR_BADUTF8. If <i>startoffset</i>
+contains an invalid value, PCRE_ERROR_BADUTF8_OFFSET is returned. <p>
+If you
+already know that your subject is valid, and you want to skip these checks
+for performance reasons, you can set the PCRE_NO_UTF8_CHECK option when
+calling <b>pcre_exec()</b>. You might want to do this for the second and subsequent
+calls to <b>pcre_exec()</b> if you are making repeated calls to find all the matches
+in a single subject string. However, you should be sure that the value of
+<i>startoffset</i> points to the start of a UTF-8 character. When PCRE_NO_UTF8_CHECK
+is set, the effect of passing an invalid UTF-8 string as a subject, or a
+value of <i>startoffset</i> that does not point to the start of a UTF-8 character,
+is undefined. Your program may crash. <p>
+ PCRE_PARTIAL<br>
+ <p>
+This option turns on the partial matching feature. If the subject string
+fails to match the pattern, but at some point during the matching process
+the end of the subject was reached (that is, the subject partially matches
+the pattern and the failure to match occurred only because there were not
+enough subject characters), <b>pcre_exec()</b> returns PCRE_ERROR_PARTIAL instead
+of PCRE_ERROR_NOMATCH. When PCRE_PARTIAL is used, there are restrictions
+on what may appear in the pattern. These are discussed in the <b>pcrepartial</b>
+ documentation.
+<h3><a name='sect14' href='#toc14'>The string to be matched by <b>pcre_exec()</b></a></h3>
+ <p>
+The subject string
+is passed to <b>pcre_exec()</b> as a pointer in <i>subject</i>, a length in <i>length</i>, and
+a starting byte offset in <i>startoffset</i>. In UTF-8 mode, the byte offset must
+point to the start of a UTF-8 character. Unlike the pattern string, the subject
+may contain binary zero bytes. When the starting offset is zero, the search
+for a match starts at the beginning of the subject, and this is by far
+the most common case. <p>
+A non-zero starting offset is useful when searching
+for another match in the same subject by calling <b>pcre_exec()</b> again after
+a previous success. Setting <i>startoffset</i> differs from just passing over a
+shortened string and setting PCRE_NOTBOL in the case of a pattern that
+begins with any kind of lookbehind. For example, consider the pattern <p>
+
+\Biss\B<br>
+ <p>
+which finds occurrences of "iss" in the middle of words. (\B matches only
+if the current position in the subject is not a word boundary.) When applied
+to the string "Mississipi" the first call to <b>pcre_exec()</b> finds the first
+occurrence. If <b>pcre_exec()</b> is called again with just the remainder of the
+subject, namely "issipi", it does not match, because \B is always false
+at the start of the subject, which is deemed to be a word boundary. However,
+if <b>pcre_exec()</b> is passed the entire string again, but with <i>startoffset</i>
+set to 4, it finds the second occurrence of "iss" because it is able to
+look behind the starting point to discover that it is preceded by a letter.
+<p>
+If a non-zero starting offset is passed when the pattern is anchored, one
+attempt to match at the given offset is made. This can only succeed if the
+pattern does not require the match to be at the start of the subject.
+<h3><a name='sect15' href='#toc15'>How
+<b>pcre_exec()</b> returns captured substrings</a></h3>
+ <p>
+In general, a pattern matches a
+certain portion of the subject, and in addition, further substrings from
+the subject may be picked out by parts of the pattern. Following the usage
+in Jeffrey Friedl&rsquo;s book, this is called "capturing" in what follows, and
+the phrase "capturing subpattern" is used for a fragment of a pattern that
+picks out a substring. PCRE supports several other kinds of parenthesized
+subpattern that do not cause substrings to be captured. <p>
+Captured substrings
+are returned to the caller via a vector of integer offsets whose address
+is passed in <i>ovector</i>. The number of elements in the vector is passed in
+<i>ovecsize</i>, which must be a non-negative number. <b>Note</b>: this argument is NOT
+the size of <i>ovector</i> in bytes. <p>
+The first two-thirds of the vector is used
+to pass back captured substrings, each substring using a pair of integers.
+The remaining third of the vector is used as workspace by <b>pcre_exec()</b> while
+matching capturing subpatterns, and is not available for passing back information.
+The length passed in <i>ovecsize</i> should always be a multiple of three. If it
+is not, it is rounded down. <p>
+When a match is successful, information about
+captured substrings is returned in pairs of integers, starting at the beginning
+of <i>ovector</i>, and continuing up to two-thirds of its length at the most. The
+first element of a pair is set to the offset of the first character in
+a substring, and the second is set to the offset of the first character
+after the end of a substring. The first pair, <i>ovector[0]</i> and <i>ovector[1]</i>,
+identify the portion of the subject string matched by the entire pattern.
+The next pair is used for the first capturing subpattern, and so on. The
+value returned by <b>pcre_exec()</b> is the number of pairs that have been set.
+If there are no capturing subpatterns, the return value from a successful
+match is 1, indicating that just the first pair of offsets has been set.
+<p>
+Some convenience functions are provided for extracting the captured substrings
+as separate strings. These are described in the following section. <p>
+It is
+possible for an capturing subpattern number <i>n+1</i> to match some part of the
+subject when subpattern <i>n</i> has not been used at all. For example, if the
+string "abc" is matched against the pattern (a|(z))(bc) subpatterns 1 and
+3 are matched, but 2 is not. When this happens, both offset values corresponding
+to the unused subpattern are set to -1. <p>
+If a capturing subpattern is matched
+repeatedly, it is the last portion of the string that it matched that is
+returned. <p>
+If the vector is too small to hold all the captured substring
+offsets, it is used as far as possible (up to two-thirds of its length),
+and the function returns a value of zero. In particular, if the substring
+offsets are not of interest, <b>pcre_exec()</b> may be called with <i>ovector</i> passed
+as NULL and <i>ovecsize</i> as zero. However, if the pattern contains back references
+and the <i>ovector</i> is not big enough to remember the related substrings, PCRE
+has to get additional memory for use during matching. Thus it is usually
+advisable to supply an <i>ovector</i>. <p>
+Note that <b>pcre_info()</b> can be used to find
+out how many capturing subpatterns there are in a compiled pattern. The
+smallest size for <i>ovector</i> that will allow for <i>n</i> captured substrings, in
+addition to the offsets of the substring matched by the whole pattern,
+is (<i>n</i>+1)*3.
+<h3><a name='sect16' href='#toc16'>Return values from <b>pcre_exec()</b></a></h3>
+ <p>
+If <b>pcre_exec()</b> fails, it returns
+a negative number. The following are defined in the header file: <p>
+ PCRE_ERROR_NOMATCH
+ (-1)<br>
+ <p>
+The subject string did not match the pattern. <p>
+ PCRE_ERROR_NULL
+ (-2)<br>
+ <p>
+Either <i>code</i> or <i>subject</i> was passed as NULL, or <i>ovector</i> was NULL and <i>ovecsize</i>
+was not zero. <p>
+ PCRE_ERROR_BADOPTION (-3)<br>
+ <p>
+An unrecognized bit was set in the <i>options</i> argument. <p>
+ PCRE_ERROR_BADMAGIC
+ (-4)<br>
+ <p>
+PCRE stores a 4-byte "magic number" at the start of the compiled code,
+to catch the case when it is passed a junk pointer and to detect when a
+pattern that was compiled in an environment of one endianness is run in
+an environment with the other endianness. This is the error that PCRE gives
+when the magic number is not present. <p>
+ PCRE_ERROR_UNKNOWN_NODE (-5)<br>
+ <p>
+While running the pattern match, an unknown item was encountered in the
+compiled pattern. This error could be caused by a bug in PCRE or by overwriting
+of the compiled pattern. <p>
+ PCRE_ERROR_NOMEMORY (-6)<br>
+ <p>
+If a pattern contains back references, but the <i>ovector</i> that is passed
+to <b>pcre_exec()</b> is not big enough to remember the referenced substrings,
+PCRE gets a block of memory at the start of matching to use for this purpose.
+If the call via <b>pcre_malloc()</b> fails, this error is given. The memory is
+automatically freed at the end of matching. <p>
+ PCRE_ERROR_NOSUBSTRING
+(-7)<br>
+ <p>
+This error is used by the <b>pcre_copy_substring()</b>, <b>pcre_get_substring()</b>,
+and <b>pcre_get_substring_list()</b> functions (see below). It is never returned
+by <b>pcre_exec()</b>. <p>
+ PCRE_ERROR_MATCHLIMIT (-8)<br>
+ <p>
+The recursion and backtracking limit, as specified by the <i>match_limit</i>
+field in a <b>pcre_extra</b> structure (or defaulted) was reached. See the description
+above. <p>
+ PCRE_ERROR_CALLOUT (-9)<br>
+ <p>
+This error is never generated by <b>pcre_exec()</b> itself. It is provided for
+use by callout functions that want to yield a distinctive error code. See
+the <b>pcrecallout</b> documentation for details. <p>
+ PCRE_ERROR_BADUTF8
+ (-10)<br>
+ <p>
+A string that contains an invalid UTF-8 byte sequence was passed as a subject.
+<p>
+ PCRE_ERROR_BADUTF8_OFFSET (-11)<br>
+ <p>
+The UTF-8 byte sequence that was passed as a subject was valid, but the
+value of <i>startoffset</i> did not point to the beginning of a UTF-8 character.
+<p>
+ PCRE_ERROR_PARTIAL (-12)<br>
+ <p>
+The subject string did not match, but it did match partially. See the
+<b>pcrepartial</b> documentation for details of partial matching. <p>
+ PCRE_ERROR_BAD_PARTIAL
+(-13)<br>
+ <p>
+The PCRE_PARTIAL option was used with a compiled pattern containing items
+that are not supported for partial matching. See the <b>pcrepartial</b> documentation
+for details of partial matching. <p>
+ PCRE_ERROR_INTERNAL (-14)<br>
+ <p>
+An unexpected internal error has occurred. This error could be caused by
+a bug in PCRE or by overwriting of the compiled pattern. <p>
+ PCRE_ERROR_BADCOUNT
+(-15)<br>
+ <p>
+This error is given if the value of the <i>ovecsize</i> argument is negative.
+
+<h2><a name='sect17' href='#toc17'>Extracting Captured Substrings by Number</a></h2>
+ <p>
+<b>int pcre_copy_substring(const
+char *<i>subject</i>, int *<i>ovector</i>,</b> <b>int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
+ <b>int <i>buffersize</i>);</b> <p>
+<br>
+<b>int pcre_get_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b> <b>int <i>stringcount</i>,
+int <i>stringnumber</i>,</b> <b>const char **<i>stringptr</i>);</b> <p>
+<br>
+<b>int pcre_get_substring_list(const char *<i>subject</i>,</b> <b>int *<i>ovector</i>, int <i>stringcount</i>,
+"const char ***<i>listptr</i>);"</b> <p>
+Captured substrings can be accessed directly
+by using the offsets returned by <b>pcre_exec()</b> in <i>ovector</i>. For convenience,
+the functions <b>pcre_copy_substring()</b>, <b>pcre_get_substring()</b>, and <b>pcre_get_substring_list()</b>
+are provided for extracting captured substrings as new, separate, zero-terminated
+strings. These functions identify substrings by number. The next section
+describes functions for extracting named substrings. A substring that contains
+a binary zero is correctly extracted and has a further zero added on the
+end, but the result is not, of course, a C string. <p>
+The first three arguments
+are the same for all three of these functions: <i>subject</i> is the subject string
+that has just been successfully matched, <i>ovector</i> is a pointer to the vector
+of integer offsets that was passed to <b>pcre_exec()</b>, and <i>stringcount</i> is the
+number of substrings that were captured by the match, including the substring
+that matched the entire regular expression. This is the value returned by
+<b>pcre_exec()</b> if it is greater than zero. If <b>pcre_exec()</b> returned zero, indicating
+that it ran out of space in <i>ovector</i>, the value passed as <i>stringcount</i> should
+be the number of elements in the vector divided by three. <p>
+The functions
+<b>pcre_copy_substring()</b> and <b>pcre_get_substring()</b> extract a single substring,
+whose number is given as <i>stringnumber</i>. A value of zero extracts the substring
+that matched the entire pattern, whereas higher values extract the captured
+substrings. For <b>pcre_copy_substring()</b>, the string is placed in <i>buffer</i>, whose
+length is given by <i>buffersize</i>, while for <b>pcre_get_substring()</b> a new block
+of memory is obtained via <b>pcre_malloc</b>, and its address is returned via
+<i>stringptr</i>. The yield of the function is the length of the string, not including
+the terminating zero, or one of <p>
+ PCRE_ERROR_NOMEMORY (-6)<br>
+ <p>
+The buffer was too small for <b>pcre_copy_substring()</b>, or the attempt to
+get memory failed for <b>pcre_get_substring()</b>. <p>
+ PCRE_ERROR_NOSUBSTRING
+(-7)<br>
+ <p>
+There is no substring whose number is <i>stringnumber</i>. <p>
+The <b>pcre_get_substring_list()</b>
+function extracts all available substrings and builds a list of pointers
+to them. All this is done in a single block of memory that is obtained via
+<b>pcre_malloc</b>. The address of the memory block is returned via <i>listptr</i>, which
+is also the start of the list of string pointers. The end of the list is
+marked by a NULL pointer. The yield of the function is zero if all went
+well, or <p>
+ PCRE_ERROR_NOMEMORY (-6)<br>
+ <p>
+if the attempt to get the memory block failed. <p>
+When any of these functions
+encounter a substring that is unset, which can happen when capturing subpattern
+number <i>n+1</i> matches some part of the subject, but subpattern <i>n</i> has not been
+used at all, they return an empty string. This can be distinguished from
+a genuine zero-length substring by inspecting the appropriate offset in
+<i>ovector</i>, which is negative for unset substrings. <p>
+The two convenience functions
+<b>pcre_free_substring()</b> and <b>pcre_free_substring_list()</b> can be used to free
+the memory returned by a previous call of <b>pcre_get_substring()</b> or <b>pcre_get_substring_list()</b>,
+respectively. They do nothing more than call the function pointed to by
+<b>pcre_free</b>, which of course could be called directly from a C program. However,
+PCRE is used in some situations where it is linked via a special interface
+to another programming language which cannot use <b>pcre_free</b> directly; it
+is for these cases that the functions are provided.
+<h2><a name='sect18' href='#toc18'>Extracting Captured
+Substrings by Name</a></h2>
+ <p>
+<b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b> <b>const char
+*<i>name</i>);</b> <p>
+<br>
+<b>int pcre_copy_named_substring(const pcre *<i>code</i>,</b> <b>const char *<i>subject</i>, int
+*<i>ovector</i>,</b> <b>int <i>stringcount</i>, const char *<i>stringname</i>,</b> <b>char *<i>buffer</i>, int
+<i>buffersize</i>);</b> <p>
+<br>
+<b>int pcre_get_named_substring(const pcre *<i>code</i>,</b> <b>const char *<i>subject</i>, int
+*<i>ovector</i>,</b> <b>int <i>stringcount</i>, const char *<i>stringname</i>,</b> <b>const char **<i>stringptr</i>);</b>
+<p>
+To extract a substring by name, you first have to find associated number.
+For example, for this pattern <p>
+ (a+)b(?&lt;xxx&gt;\d+)...<br>
+ <p>
+the number of the subpattern called "xxx" is 2. You can find the number
+from the name by calling <b>pcre_get_stringnumber()</b>. The first argument is
+the compiled pattern, and the second is the name. The yield of the function
+is the subpattern number, or PCRE_ERROR_NOSUBSTRING (-7) if there is no
+subpattern of that name. <p>
+Given the number, you can extract the substring
+directly, or use one of the functions described in the previous section.
+For convenience, there are also two functions that do the whole job. <p>
+Most
+of the arguments of <i>pcre_copy_named_substring()</i> and <i>pcre_get_named_substring()</i>
+are the same as those for the similarly named functions that extract by
+number. As these are described in the previous section, they are not re-described
+here. There are just two differences: <p>
+First, instead of a substring number,
+a substring name is given. Second, there is an extra argument, given at
+the start, which is a pointer to the compiled pattern. This is needed in
+order to gain access to the name-to-number translation table. <p>
+These functions
+call <b>pcre_get_stringnumber()</b>, and if it succeeds, they then call <i>pcre_copy_substring()</i>
+or <i>pcre_get_substring()</i>, as appropriate. <p>
+ Last updated: 09 September 2004
+<br>
+Copyright (c) 1997-2004 University of Cambridge. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Pcre Native API</a></li>
+<li><a name='toc2' href='#sect2'>Pcre API Overview</a></li>
+<li><a name='toc3' href='#sect3'>Multithreading</a></li>
+<li><a name='toc4' href='#sect4'>Saving Precompiled Patterns for Later Use</a></li>
+<li><a name='toc5' href='#sect5'>Checking Build-time Options</a></li>
+<li><a name='toc6' href='#sect6'>Compiling a Pattern</a></li>
+<li><a name='toc7' href='#sect7'>Studying a Pattern</a></li>
+<li><a name='toc8' href='#sect8'>Locale Support</a></li>
+<li><a name='toc9' href='#sect9'>Information About a Pattern</a></li>
+<li><a name='toc10' href='#sect10'>Obsolete Info Function</a></li>
+<li><a name='toc11' href='#sect11'>Matching a Pattern</a></li>
+<ul>
+<li><a name='toc12' href='#sect12'>Extra data for pcre_exec()</a></li>
+<li><a name='toc13' href='#sect13'>Option bits for pcre_exec()</a></li>
+<li><a name='toc14' href='#sect14'>The string to be matched by pcre_exec()</a></li>
+<li><a name='toc15' href='#sect15'>How pcre_exec() returns captured substrings</a></li>
+<li><a name='toc16' href='#sect16'>Return values from pcre_exec()</a></li>
+</ul>
+<li><a name='toc17' href='#sect17'>Extracting Captured Substrings by Number</a></li>
+<li><a name='toc18' href='#sect18'>Extracting Captured Substrings by Name</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcrebuild.3.html b/spamfilter/Utilities/PCRE/man/html/pcrebuild.3.html
new file mode 100644
index 0000000..950a1f0
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcrebuild.3.html
@@ -0,0 +1,167 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Pcre Build-time Options</a></h2>
+ <p>
+This
+document describes the optional features of PCRE that can be selected when
+the library is compiled. They are all selected, or deselected, by providing
+options to the <b>configure</b> script that is run before the <b>make</b> command. The
+complete list of options for <b>configure</b> (which includes the standard ones
+such as the selection of the installation directory) can be obtained by
+running <p>
+ ./configure --help<br>
+ <p>
+The following sections describe certain options whose names begin with
+--enable or --disable. These settings specify changes to the defaults for the
+<b>configure</b> command. Because of the way that <b>configure</b> works, --enable and --disable
+always come in pairs, so the complementary option always exists as well,
+but as it specifies the default, it is not described.
+<h2><a name='sect2' href='#toc2'>Utf-8 Support</a></h2>
+ <p>
+To build
+PCRE with support for UTF-8 character strings, add <p>
+ --enable-utf8<br>
+ <p>
+to the <b>configure</b> command. Of itself, this does not make PCRE treat strings
+as UTF-8. As well as compiling PCRE with this option, you also have have
+to set the PCRE_UTF8 option when you call the <b>pcre_compile()</b> function.
+
+<h2><a name='sect3' href='#toc3'>Unicode Character Property Support</a></h2>
+ <p>
+UTF-8 support allows PCRE to process
+character values greater than 255 in the strings that it handles. On its
+own, however, it does not provide any facilities for accessing the properties
+of such characters. If you want to be able to use the pattern escapes \P,
+\p, and \X, which refer to Unicode character properties, you must add <p>
+ --enable-unicode-properties<br>
+ <p>
+to the <b>configure</b> command. This implies UTF-8 support, even if you have not
+explicitly requested it. <p>
+Including Unicode property support adds around
+90K of tables to the PCRE library, approximately doubling its size. Only
+the general category properties such as <i>Lu</i> and <i>Nd</i> are supported. Details
+are given in the <b>pcrepattern</b> documentation.
+<h2><a name='sect4' href='#toc4'>Code Value of Newline</a></h2>
+ <p>
+By
+default, PCRE treats character 10 (linefeed) as the newline character. This
+is the normal newline character on Unix-like systems. You can compile PCRE
+to use character 13 (carriage return) instead by adding <p>
+ --enable-newline-is-cr<br>
+ <p>
+to the <b>configure</b> command. For completeness there is also a --enable-newline-is-lf
+option, which explicitly specifies linefeed as the newline character.
+<h2><a name='sect5' href='#toc5'>Building
+Shared and Static Libraries</a></h2>
+ <p>
+The PCRE building process uses <b>libtool</b> to build
+both shared and static Unix libraries by default. You can suppress one of
+these by adding one of <p>
+ --disable-shared<br>
+ --disable-static<br>
+ <p>
+to the <b>configure</b> command, as required.
+<h2><a name='sect6' href='#toc6'>Posix Malloc Usage</a></h2>
+ <p>
+When PCRE is
+called through the POSIX interface (see the <b>pcreposix</b> documentation),
+additional working storage is required for holding the pointers to capturing
+substrings, because PCRE requires three integers per substring, whereas
+the POSIX interface provides only two. If the number of expected substrings
+is small, the wrapper function uses space on the stack, because this is
+faster than using <b>malloc()</b> for each call. The default threshold above which
+the stack is no longer used is 10; it can be changed by adding a setting
+such as <p>
+ --with-posix-malloc-threshold=20<br>
+ <p>
+to the <b>configure</b> command.
+<h2><a name='sect7' href='#toc7'>Limiting Pcre Resource Usage</a></h2>
+ <p>
+Internally, PCRE
+has a function called <b>match()</b>, which it calls repeatedly (possibly recursively)
+when matching a pattern. By controlling the maximum number of times this
+function may be called during a single matching operation, a limit can
+be placed on the resources used by a single call to <b>pcre_exec()</b>. The limit
+can be changed at run time, as described in the <b>pcreapi</b> documentation.
+The default is 10 million, but this can be changed by adding a setting
+such as <p>
+ --with-match-limit=500000<br>
+ <p>
+to the <b>configure</b> command.
+<h2><a name='sect8' href='#toc8'>Handling Very Large Patterns</a></h2>
+ <p>
+Within a compiled
+pattern, offset values are used to point from one part to another (for
+example, from an opening parenthesis to an alternation metacharacter). By
+default, two-byte values are used for these offsets, leading to a maximum
+size for a compiled pattern of around 64K. This is sufficient to handle
+all but the most gigantic patterns. Nevertheless, some people do want to
+process enormous patterns, so it is possible to compile PCRE to use three-byte
+or four-byte offsets by adding a setting such as <p>
+ --with-link-size=3<br>
+ <p>
+to the <b>configure</b> command. The value given must be 2, 3, or 4. Using longer
+offsets slows down the operation of PCRE because it has to load additional
+bytes when handling them. <p>
+If you build PCRE with an increased link size,
+test 2 (and test 5 if you are using UTF-8) will fail. Part of the output
+of these tests is a representation of the compiled pattern, and this changes
+with the link size.
+<h2><a name='sect9' href='#toc9'>Avoiding Excessive Stack Usage</a></h2>
+ <p>
+PCRE implements backtracking
+while matching by making recursive calls to an internal function called
+<b>match()</b>. In environments where the size of the stack is limited, this can
+severely limit PCRE&rsquo;s operation. (The Unix environment does not usually suffer
+from this problem.) An alternative approach that uses memory from the heap
+to remember data, instead of using recursive function calls, has been implemented
+to work round this problem. If you want to build a version of PCRE that
+works this way, add <p>
+ --disable-stack-for-recursion<br>
+ <p>
+to the <b>configure</b> command. With this configuration, PCRE will use the <b>pcre_stack_malloc</b>
+and <b>pcre_stack_free</b> variables to call memory management functions. Separate
+functions are provided because the usage is very predictable: the block
+sizes requested are always the same, and the blocks are always freed in
+reverse order. A calling program might be able to implement optimized functions
+that perform better than the standard <b>malloc()</b> and <b>free()</b> functions. PCRE
+runs noticeably more slowly when built in this way.
+<h2><a name='sect10' href='#toc10'>Using Ebcdic Code</a></h2>
+ <p>
+PCRE
+assumes by default that it will run in an environment where the character
+code is ASCII (or Unicode, which is a superset of ASCII). PCRE can, however,
+be compiled to run in an EBCDIC environment by adding <p>
+ --enable-ebcdic<br>
+ <p>
+to the <b>configure</b> command. <p>
+ Last updated: 09 September 2004 <br>
+Copyright (c) 1997-2004 University of Cambridge. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Pcre Build-time Options</a></li>
+<li><a name='toc2' href='#sect2'>Utf-8 Support</a></li>
+<li><a name='toc3' href='#sect3'>Unicode Character Property Support</a></li>
+<li><a name='toc4' href='#sect4'>Code Value of Newline</a></li>
+<li><a name='toc5' href='#sect5'>Building Shared and Static Libraries</a></li>
+<li><a name='toc6' href='#sect6'>Posix Malloc Usage</a></li>
+<li><a name='toc7' href='#sect7'>Limiting Pcre Resource Usage</a></li>
+<li><a name='toc8' href='#sect8'>Handling Very Large Patterns</a></li>
+<li><a name='toc9' href='#sect9'>Avoiding Excessive Stack Usage</a></li>
+<li><a name='toc10' href='#sect10'>Using Ebcdic Code</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcrecallout.3.html b/spamfilter/Utilities/PCRE/man/html/pcrecallout.3.html
new file mode 100644
index 0000000..83e61b2
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcrecallout.3.html
@@ -0,0 +1,148 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Pcre Callouts</a></h2>
+ <p>
+<b>int (*pcre_callout)(pcre_callout_block
+*);</b> <p>
+PCRE provides a feature called "callout", which is a means of temporarily
+passing control to the caller of PCRE in the middle of pattern matching.
+The caller of PCRE provides an external function by putting its entry point
+in the global variable <i>pcre_callout</i>. By default, this variable contains
+NULL, which disables all calling out. <p>
+Within a regular expression, (?C)
+indicates the points at which the external function is to be called. Different
+callout points can be identified by putting a number less than 256 after
+the letter C. The default value is zero. For example, this pattern has two
+callout points: <p>
+ (?C1)deabc(?C2)def<br>
+ <p>
+If the PCRE_AUTO_CALLOUT option bit is set when <b>pcre_compile()</b> is called,
+PCRE automatically inserts callouts, all with number 255, before each item
+in the pattern. For example, if PCRE_AUTO_CALLOUT is used with the pattern
+<p>
+ A(\d{2}|--)<br>
+ <p>
+it is processed as if it were <p>
+(?C255)A(?C255)((?C255)\d{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255)
+<p>
+Notice that there is a callout before and after each parenthesis and alternation
+bar. Automatic callouts can be used for tracking the progress of pattern
+matching. The <b>pcretest</b> command has an option that sets automatic callouts;
+when it is used, the output indicates how the pattern is matched. This is
+useful information when you are trying to optimize the performance of a
+particular pattern.
+<h2><a name='sect2' href='#toc2'>Missing Callouts</a></h2>
+ <p>
+You should be aware that, because
+of optimizations in the way PCRE matches patterns, callouts sometimes do
+not happen. For example, if the pattern is <p>
+ ab(?C4)cd<br>
+ <p>
+PCRE knows that any matching string must contain the letter "d". If the
+subject string is "abyz", the lack of "d" means that matching doesn&rsquo;t ever
+start, and the callout is never reached. However, with "abyd", though the
+result is still no match, the callout is obeyed.
+<h2><a name='sect3' href='#toc3'>the Callout Interface</a></h2>
+
+<p>
+During matching, when PCRE reaches a callout point, the external function
+defined by <i>pcre_callout</i> is called (if it is set). The only argument is a
+pointer to a <b>pcre_callout</b> block. This structure contains the following fields:
+<p>
+ int <i>version</i>;<br>
+ int <i>callout_number</i>;<br>
+ int *<i>offset_vector</i>;<br>
+ const char *<i>subject</i>;<br>
+ int <i>subject_length</i>;<br>
+ int <i>start_match</i>;<br>
+ int <i>current_position</i>;<br>
+ int <i>capture_top</i>;<br>
+ int <i>capture_last</i>;<br>
+ void *<i>callout_data</i>;<br>
+ int <i>pattern_position</i>;<br>
+ int <i>next_item_length</i>;<br>
+ <p>
+The <i>version</i> field is an integer containing the version number of the block
+format. The initial version was 0; the current version is 1. The version
+number will change again in future if additional fields are added, but
+the intention is never to remove any of the existing fields. <p>
+The <i>callout_number</i>
+field contains the number of the callout, as compiled into the pattern
+(that is, the number after ?C for manual callouts, and 255 for automatically
+generated callouts). <p>
+The <i>offset_vector</i> field is a pointer to the vector
+of offsets that was passed by the caller to <b>pcre_exec()</b>. The contents can
+be inspected in order to extract substrings that have been matched so far,
+in the same way as for extracting substrings after a match has completed.
+<p>
+The <i>subject</i> and <i>subject_length</i> fields contain copies of the values that
+were passed to <b>pcre_exec()</b>. <p>
+The <i>start_match</i> field contains the offset within
+the subject at which the current match attempt started. If the pattern is
+not anchored, the callout function may be called several times from the
+same point in the pattern for different starting points in the subject.
+<p>
+The <i>current_position</i> field contains the offset within the subject of the
+current match pointer. <p>
+The <i>capture_top</i> field contains one more than the
+number of the highest numbered captured substring so far. If no substrings
+have been captured, the value of <i>capture_top</i> is one. <p>
+The <i>capture_last</i> field
+contains the number of the most recently captured substring. If no substrings
+have been captured, its value is -1. <p>
+The <i>callout_data</i> field contains a value
+that is passed to <b>pcre_exec()</b> by the caller specifically so that it can
+be passed back in callouts. It is passed in the <i>pcre_callout</i> field of the
+<b>pcre_extra</b> data structure. If no such data was passed, the value of <i>callout_data</i>
+in a <b>pcre_callout</b> block is NULL. There is a description of the <b>pcre_extra</b>
+structure in the <b>pcreapi</b> documentation. <p>
+The <i>pattern_position</i> field is
+present from version 1 of the <i>pcre_callout</i> structure. It contains the offset
+to the next item to be matched in the pattern string. <p>
+The <i>next_item_length</i>
+field is present from version 1 of the <i>pcre_callout</i> structure. It contains
+the length of the next item to be matched in the pattern string. When the
+callout immediately precedes an alternation bar, a closing parenthesis,
+or the end of the pattern, the length is zero. When the callout precedes
+an opening parenthesis, the length is that of the entire subpattern. <p>
+The
+<i>pattern_position</i> and <i>next_item_length</i> fields are intended to help in distinguishing
+between different automatic callouts, which all have the same callout number.
+However, they are set for all callouts.
+<h2><a name='sect4' href='#toc4'>Return Values</a></h2>
+ <p>
+The external callout
+function returns an integer to PCRE. If the value is zero, matching proceeds
+as normal. If the value is greater than zero, matching fails at the current
+point, but backtracking to test other matching possibilities goes ahead,
+just as if a lookahead assertion had failed. If the value is less than zero,
+the match is abandoned, and <b>pcre_exec()</b> returns the negative value. <p>
+Negative
+values should normally be chosen from the set of PCRE_ERROR_xxx values.
+In particular, PCRE_ERROR_NOMATCH forces a standard "no match" failure.
+The error number PCRE_ERROR_CALLOUT is reserved for use by callout functions;
+it will never be used by PCRE itself. <p>
+ Last updated: 09 September 2004 <br>
+Copyright (c) 1997-2004 University of Cambridge. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Pcre Callouts</a></li>
+<li><a name='toc2' href='#sect2'>Missing Callouts</a></li>
+<li><a name='toc3' href='#sect3'>the Callout Interface</a></li>
+<li><a name='toc4' href='#sect4'>Return Values</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcrecompat.3.html b/spamfilter/Utilities/PCRE/man/html/pcrecompat.3.html
new file mode 100644
index 0000000..af67000
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcrecompat.3.html
@@ -0,0 +1,115 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Differences Between Pcre and
+Perl</a></h2>
+ <p>
+This document describes the differences in the ways that PCRE and
+Perl handle regular expressions. The differences described here are with
+respect to Perl 5.8. <p>
+1. PCRE does not have full UTF-8 support. Details of what
+it does have are given in the section on UTF-8 support in the main <b>pcre</b>
+ page. <p>
+2. PCRE does not allow repeat quantifiers on lookahead assertions.
+Perl permits them, but they do not mean what you might think. For example,
+(?!a){3} does not assert that the next three characters are not "a". It
+just asserts that the next character is not "a" three times. <p>
+3. Capturing
+subpatterns that occur inside negative lookahead assertions are counted,
+but their entries in the offsets vector are never set. Perl sets its numerical
+variables from any such patterns that are matched before the assertion
+fails to match something (thereby succeeding), but only if the negative
+lookahead assertion contains just one branch. <p>
+4. Though binary zero characters
+are supported in the subject string, they are not allowed in a pattern
+string because it is passed as a normal C string, terminated by zero. The
+escape sequence \0 can be used in the pattern to represent a binary zero.
+<p>
+5. The following Perl escape sequences are not supported: \l, \u, \L, \U, and
+\N. In fact these are implemented by Perl&rsquo;s general string-handling and are
+not part of its pattern matching engine. If any of these are encountered
+by PCRE, an error is generated. <p>
+6. The Perl escape sequences \p, \P, and \X
+are supported only if PCRE is built with Unicode character property support.
+The properties that can be tested with \p and \P are limited to the general
+category properties such as Lu and Nd. <p>
+7. PCRE does support the \Q...\E escape
+for quoting substrings. Characters in between are treated as literals. This
+is slightly different from Perl in that $ and @ are also handled as literals
+inside the quotes. In Perl, they cause variable interpolation (but of course
+PCRE does not have variables). Note the following examples: <p>
+ Pattern
+ PCRE matches Perl matches<br>
+ <p>
+ \Qabc$xyz\E abc$xyz abc followed by the<br>
+ contents of $xyz<br>
+ \Qabc\$xyz\E abc\$xyz abc\$xyz<br>
+ \Qabc\E\$\Qxyz\E abc$xyz abc$xyz<br>
+ <p>
+The \Q...\E sequence is recognized both inside and outside character classes.
+<p>
+8. Fairly obviously, PCRE does not support the (?{code}) and (?p{code})
+constructions. However, there is support for recursive patterns using the
+non-Perl items (?R), (?number), and (?P&gt;name). Also, the PCRE "callout" feature
+allows an external function to be called during pattern matching. See the
+ <b>pcrecallout</b> documentation for details. <p>
+9. There are some differences that
+are concerned with the settings of captured strings when part of a pattern
+is repeated. For example, matching "aba" against the pattern /^(a(b)?)+$/
+in Perl leaves $2 unset, but in PCRE it is set to "b". <p>
+10. PCRE provides
+some extensions to the Perl regular expression facilities: <p>
+(a) Although
+lookbehind assertions must match fixed length strings, each alternative
+branch of a lookbehind assertion can match a different length of string.
+Perl requires them all to have the same length. <p>
+(b) If PCRE_DOLLAR_ENDONLY
+is set and PCRE_MULTILINE is not set, the $ meta-character matches only
+at the very end of the string. <p>
+(c) If PCRE_EXTRA is set, a backslash followed
+by a letter with no special meaning is faulted. <p>
+(d) If PCRE_UNGREEDY is
+set, the greediness of the repetition quantifiers is inverted, that is,
+by default they are not greedy, but if followed by a question mark they
+are. <p>
+(e) PCRE_ANCHORED can be used at matching time to force a pattern to
+be tried only at the first matching position in the subject string. <p>
+(f)
+The PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY, and PCRE_NO_AUTO_CAPTURE options
+for <b>pcre_exec()</b> have no Perl equivalents. <p>
+(g) The (?R), (?number), and (?P&gt;name)
+constructs allows for recursive pattern matching (Perl can do this using
+the (?p{code}) construct, which PCRE cannot support.) <p>
+(h) PCRE supports
+named capturing substrings, using the Python syntax. <p>
+(i) PCRE supports the
+possessive quantifier "++" syntax, taken from Sun&rsquo;s Java package. <p>
+(j) The
+(R) condition, for testing recursion, is a PCRE extension. <p>
+(k) The callout
+facility is PCRE-specific. <p>
+(l) The partial matching facility is PCRE-specific.
+<p>
+(m) Patterns compiled by PCRE can be saved and re-used at a later time,
+even on different hosts that have the other endianness. <p>
+ Last updated: 09
+September 2004 <br>
+Copyright (c) 1997-2004 University of Cambridge. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Differences Between Pcre and Perl</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcregrep.1.html b/spamfilter/Utilities/PCRE/man/html/pcregrep.1.html
new file mode 100644
index 0000000..726e8b7
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcregrep.1.html
@@ -0,0 +1,147 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCREGREP(1) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+pcregrep - a grep with Perl-compatible regular expressions.
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+<b>pcregrep
+[-Vcfhilnrsuvx] [long options] [pattern] [file1 file2 ...]</b>
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+ <p>
+<b>pcregrep</b>
+searches files for character patterns, in the same way as other grep commands
+do, but it uses the PCRE regular expression library to support patterns
+that are compatible with the regular expressions of Perl 5. See <b>pcrepattern</b>
+ for a full description of syntax and semantics of the regular expressions
+that PCRE supports. <p>
+A pattern must be specified on the command line unless
+the <b>-f</b> option is used (see below). <p>
+If no files are specified, <b>pcregrep</b> reads
+the standard input. By default, each line that matches the pattern is copied
+to the standard output, and if there is more than one file, the file name
+is printed before each line of output. However, there are options that can
+change how <b>pcregrep</b> behaves. <p>
+Lines are limited to BUFSIZ characters. BUFSIZ
+is defined in <b>&lt;stdio.h&gt;</b>. The newline character is removed from the end of each
+line before it is matched against the pattern.
+<h2><a name='sect3' href='#toc3'>Options</a></h2>
+ <p>
+
+<dl>
+
+<dt><b>-V</b> </dt>
+<dd>Write the version
+number of the PCRE library being used to the standard error stream. </dd>
+
+<dt><b>-c</b> </dt>
+<dd>Do
+not print individual lines; instead just print a count of the number of
+lines that would otherwise have been printed. If several files are given,
+a count is printed for each of them. </dd>
+
+<dt><b>-f</b><i>filename</i> </dt>
+<dd>Read a number of patterns
+from the file, one per line, and match all of them against each line of
+input. A line is output if any of the patterns match it. When <b>-f</b> is used,
+no pattern is taken from the command line; all arguments are treated as
+file names. There is a maximum of 100 patterns. Trailing white space is removed,
+and blank lines are ignored. An empty file contains no patterns and therefore
+matches nothing. </dd>
+
+<dt><b>-h</b> </dt>
+<dd>Suppress printing of filenames when searching multiple
+files. </dd>
+
+<dt><b>-i</b> </dt>
+<dd>Ignore upper/lower case distinctions during comparisons. </dd>
+
+<dt><b>-l</b> </dt>
+<dd>Instead
+of printing lines from the files, just print the names of the files containing
+lines that would have been printed. Each file name is printed once, on a
+separate line. </dd>
+
+<dt><b>-n</b> </dt>
+<dd>Precede each line by its line number in the file. </dd>
+
+<dt><b>-r</b> </dt>
+<dd>If any
+file is a directory, recursively scan the files it contains. Without <b>-r</b> a
+directory is scanned as a normal file. </dd>
+
+<dt><b>-s</b> </dt>
+<dd>Work silently, that is, display
+nothing except error messages. The exit status indicates whether any matches
+were found. </dd>
+
+<dt><b>-u</b> </dt>
+<dd>Operate in UTF-8 mode. This option is available only if PCRE
+has been compiled with UTF-8 support. Both the pattern and each subject line
+must be valid strings of UTF-8 characters. </dd>
+
+<dt><b>-v</b> </dt>
+<dd>Invert the sense of the match,
+so that lines which do <i>not</i> match the pattern are now the ones that are
+found. </dd>
+
+<dt><b>-x</b> </dt>
+<dd>Force the pattern to be anchored (it must start matching at the
+beginning of the line) and in addition, require it to match the entire
+line. This is equivalent to having ^ and $ characters at the start and end
+of each alternative branch in the regular expression. </dd>
+</dl>
+
+<h2><a name='sect4' href='#toc4'>Long Options</a></h2>
+ <p>
+Long
+forms of all the options are available, as in GNU grep. They are shown in
+the following table: <p>
+ -c --count<br>
+ -h --no-filename<br>
+ -i --ignore-case<br>
+ -l --files-with-matches<br>
+ -n --line-number<br>
+ -r --recursive<br>
+ -s --no-messages<br>
+ -u --utf-8<br>
+ -V --version<br>
+ -v --invert-match<br>
+ -x --line-regex<br>
+ -x --line-regexp<br>
+ <p>
+In addition, --file=<i>filename</i> is equivalent to -f<i>filename</i>, and --help shows
+the list of options and then exits.
+<h2><a name='sect5' href='#toc5'>Diagnostics</a></h2>
+ <p>
+Exit status is 0 if any
+matches were found, 1 if no matches were found, and 2 for syntax errors
+or inacessible files (even if matches were found).
+<h2><a name='sect6' href='#toc6'>Author</a></h2>
+ <p>
+Philip Hazel
+&lt;ph10@cam.ac.uk&gt; <br>
+University Computing Service <br>
+Cambridge CB2 3QG, England. <p>
+ Last updated: 09 September 2004 <br>
+Copyright (c) 1997-2004 University of Cambridge. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+<li><a name='toc3' href='#sect3'>Options</a></li>
+<li><a name='toc4' href='#sect4'>Long Options</a></li>
+<li><a name='toc5' href='#sect5'>Diagnostics</a></li>
+<li><a name='toc6' href='#sect6'>Author</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcrepattern.3.html b/spamfilter/Utilities/PCRE/man/html/pcrepattern.3.html
new file mode 100644
index 0000000..11bb198
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcrepattern.3.html
@@ -0,0 +1,1268 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Pcre Regular Expression Details</a></h2>
+
+<p>
+The syntax and semantics of the regular expressions supported by PCRE are
+described below. Regular expressions are also described in the Perl documentation
+and in a number of books, some of which have copious examples. Jeffrey Friedl&rsquo;s
+"Mastering Regular Expressions", published by O&rsquo;Reilly, covers regular expressions
+in great detail. This description of PCRE&rsquo;s regular expressions is intended
+as reference material. <p>
+The original operation of PCRE was on strings of
+one-byte characters. However, there is now also support for UTF-8 character
+strings. To use this, you must build PCRE to include UTF-8 support, and then
+call <b>pcre_compile()</b> with the PCRE_UTF8 option. How this affects pattern
+matching is mentioned in several places below. There is also a summary of
+UTF-8 features in the section on UTF-8 support in the main <b>pcre</b> page.
+<p>
+A regular expression is a pattern that is matched against a subject string
+from left to right. Most characters stand for themselves in a pattern, and
+match the corresponding characters in the subject. As a trivial example,
+the pattern <p>
+ The quick brown fox<br>
+ <p>
+matches a portion of a subject string that is identical to itself. The
+power of regular expressions comes from the ability to include alternatives
+and repetitions in the pattern. These are encoded in the pattern by the
+use of <i>metacharacters</i>, which do not stand for themselves but instead are
+interpreted in some special way. <p>
+There are two different sets of metacharacters:
+those that are recognized anywhere in the pattern except within square
+brackets, and those that are recognized in square brackets. Outside square
+brackets, the metacharacters are as follows: <p>
+ \ general escape character
+with several uses<br>
+ ^ assert start of string (or line, in multiline mode)<br>
+ $ assert end of string (or line, in multiline mode)<br>
+ . match any character except newline (by default)<br>
+ [ start character class definition<br>
+ | start of alternative branch<br>
+ ( start subpattern<br>
+ ) end subpattern<br>
+ ? extends the meaning of (<br>
+ also 0 or 1 quantifier<br>
+ also quantifier minimizer<br>
+ * 0 or more quantifier<br>
+ + 1 or more quantifier<br>
+ also "possessive quantifier"<br>
+ { start min/max quantifier<br>
+ <p>
+Part of a pattern that is in square brackets is called a "character class".
+In a character class the only metacharacters are: <p>
+ \ general escape
+character<br>
+ ^ negate the class, but only if the first character<br>
+ - indicates character range<br>
+ [ POSIX character class (only if followed by POSIX<br>
+ syntax)<br>
+ ] terminates the character class<br>
+ <p>
+The following sections describe the use of each of the metacharacters.
+
+<h2><a name='sect2' href='#toc2'>Backslash</a></h2>
+ <p>
+The backslash character has several uses. Firstly, if it is followed
+by a non-alphanumeric character, it takes away any special meaning that
+character may have. This use of backslash as an escape character applies
+both inside and outside character classes. <p>
+For example, if you want to match
+a * character, you write \* in the pattern. This escaping action applies
+whether or not the following character would otherwise be interpreted as
+a metacharacter, so it is always safe to precede a non-alphanumeric with
+backslash to specify that it stands for itself. In particular, if you want
+to match a backslash, you write \\. <p>
+If a pattern is compiled with the PCRE_EXTENDED
+option, whitespace in the pattern (other than in a character class) and
+characters between a # outside a character class and the next newline character
+are ignored. An escaping backslash can be used to include a whitespace or
+# character as part of the pattern. <p>
+If you want to remove the special meaning
+from a sequence of characters, you can do so by putting them between \Q
+and \E. This is different from Perl in that $ and @ are handled as literals
+in \Q...\E sequences in PCRE, whereas in Perl, $ and @ cause variable interpolation.
+Note the following examples: <p>
+ Pattern PCRE matches Perl matches<br>
+ <p>
+ \Qabc$xyz\E abc$xyz abc followed by the<br>
+ contents of $xyz<br>
+ \Qabc\$xyz\E abc\$xyz abc\$xyz<br>
+ \Qabc\E\$\Qxyz\E abc$xyz abc$xyz<br>
+ <p>
+The \Q...\E sequence is recognized both inside and outside character classes.
+
+<h3><a name='sect3' href='#toc3'>Non-printing characters</a></h3>
+ <p>
+A second use of backslash provides a way of encoding
+non-printing characters in patterns in a visible manner. There is no restriction
+on the appearance of non-printing characters, apart from the binary zero
+that terminates a pattern, but when a pattern is being prepared by text
+editing, it is usually easier to use one of the following escape sequences
+than the binary character it represents: <p>
+ \a alarm, that is, the
+BEL character (hex 07)<br>
+ \cx "control-x", where x is any character<br>
+ \e escape (hex 1B)<br>
+ \f formfeed (hex 0C)<br>
+ \n newline (hex 0A)<br>
+ \r carriage return (hex 0D)<br>
+ \t tab (hex 09)<br>
+ \ddd character with octal code ddd, or backreference<br>
+ \xhh character with hex code hh<br>
+ \x{hhh..} character with hex code hhh... (UTF-8 mode only)<br>
+ <p>
+The precise effect of \cx is as follows: if x is a lower case letter, it
+is converted to upper case. Then bit 6 of the character (hex 40) is inverted.
+Thus \cz becomes hex 1A, but \c{ becomes hex 3B, while \c; becomes hex 7B.
+<p>
+After \x, from zero to two hexadecimal digits are read (letters can be in
+upper or lower case). In UTF-8 mode, any number of hexadecimal digits may
+appear between \x{ and }, but the value of the character code must be less
+than 2**31 (that is, the maximum hexadecimal value is 7FFFFFFF). If characters
+other than hexadecimal digits appear between \x{ and }, or if there is no
+terminating }, this form of escape is not recognized. Instead, the initial
+\x will be interpreted as a basic hexadecimal escape, with no following
+digits, giving a character whose value is zero. <p>
+Characters whose value is
+less than 256 can be defined by either of the two syntaxes for \x when PCRE
+is in UTF-8 mode. There is no difference in the way they are handled. For
+example, \xdc is exactly the same as \x{dc}. <p>
+After \0 up to two further octal
+digits are read. In both cases, if there are fewer than two digits, just
+those that are present are used. Thus the sequence \0\x\07 specifies two binary
+zeros followed by a BEL character (code value 7). Make sure you supply two
+digits after the initial zero if the pattern character that follows is
+itself an octal digit. <p>
+The handling of a backslash followed by a digit other
+than 0 is complicated. Outside a character class, PCRE reads it and any
+following digits as a decimal number. If the number is less than 10, or
+if there have been at least that many previous capturing left parentheses
+in the expression, the entire sequence is taken as a <i>back reference</i>. A description
+of how this works is given later, following the discussion of parenthesized
+subpatterns. <p>
+Inside a character class, or if the decimal number is greater
+than 9 and there have not been that many capturing subpatterns, PCRE re-reads
+up to three octal digits following the backslash, and generates a single
+byte from the least significant 8 bits of the value. Any subsequent digits
+stand for themselves. For example: <p>
+ \040 is another way of writing a space<br>
+ \40 is the same, provided there are fewer than 40<br>
+ previous capturing subpatterns<br>
+ \7 is always a back reference<br>
+ \11 might be a back reference, or another way of<br>
+ writing a tab<br>
+ \011 is always a tab<br>
+ \0113 is a tab followed by the character "3"<br>
+ \113 might be a back reference, otherwise the<br>
+ character with octal code 113<br>
+ \377 might be a back reference, otherwise<br>
+ the byte consisting entirely of 1 bits<br>
+ \81 is either a back reference, or a binary zero<br>
+ followed by the two characters "8" and "1"<br>
+ <p>
+Note that octal values of 100 or greater must not be introduced by a leading
+zero, because no more than three octal digits are ever read. <p>
+All the sequences
+that define a single byte value or a single UTF-8 character (in UTF-8 mode)
+can be used both inside and outside character classes. In addition, inside
+a character class, the sequence \b is interpreted as the backspace character
+(hex 08), and the sequence \X is interpreted as the character "X". Outside
+a character class, these sequences have different meanings (see below).
+
+<h3><a name='sect4' href='#toc4'>Generic character types</a></h3>
+ <p>
+The third use of backslash is for specifying
+generic character types. The following are always recognized: <p>
+ \d any
+decimal digit<br>
+ \D any character that is not a decimal digit<br>
+ \s any whitespace character<br>
+ \S any character that is not a whitespace character<br>
+ \w any "word" character<br>
+ \W any "non-word" character<br>
+ <p>
+Each pair of escape sequences partitions the complete set of characters
+into two disjoint sets. Any given character matches one, and only one, of
+each pair. <p>
+These character type sequences can appear both inside and outside
+character classes. They each match one character of the appropriate type.
+If the current matching point is at the end of the subject string, all
+of them fail, since there is no character to match. <p>
+For compatibility with
+Perl, \s does not match the VT character (code 11). This makes it different
+from the the POSIX "space" class. The \s characters are <a href='HT.9.html'>HT (9)</a>
+, LF (10),
+FF (12), CR (13), and space (32). <p>
+A "word" character is an underscore or
+any character less than 256 that is a letter or digit. The definition of
+letters and digits is controlled by PCRE&rsquo;s low-valued character tables, and
+may vary if locale-specific matching is taking place (see "Locale support"
+ in the <b>pcreapi</b> page). For example, in the "fr_FR" (French) locale, some
+character codes greater than 128 are used for accented letters, and these
+are matched by \w. <p>
+In UTF-8 mode, characters with values greater than 128
+never match \d, \s, or \w, and always match \D, \S, and \W. This is true even
+when Unicode character property support is available.
+<h3><a name='sect5' href='#toc5'>Unicode character
+properties</a></h3>
+ <p>
+When PCRE is built with Unicode character property support,
+three additional escape sequences to match generic character types are
+available when UTF-8 mode is selected. They are: <p>
+ \p{<i>xx</i>} a character with
+the <i>xx</i> property<br>
+ \P{<i>xx</i>} a character without the <i>xx</i> property<br>
+ \X an extended Unicode sequence<br>
+ <p>
+The property names represented by <i>xx</i> above are limited to the Unicode
+general category properties. Each character has exactly one such property,
+specified by a two-letter abbreviation. For compatibility with Perl, negation
+can be specified by including a circumflex between the opening brace and
+the property name. For example, \p{^Lu} is the same as \P{Lu}. <p>
+If only one letter
+is specified with \p or \P, it includes all the properties that start with
+that letter. In this case, in the absence of negation, the curly brackets
+in the escape sequence are optional; these two examples have the same effect:
+<p>
+ \p{L}<br>
+ \pL<br>
+ <p>
+The following property codes are supported: <p>
+ C Other<br>
+ Cc Control<br>
+ Cf Format<br>
+ Cn Unassigned<br>
+ Co Private use<br>
+ Cs Surrogate<br>
+ <p>
+ L Letter<br>
+ Ll Lower case letter<br>
+ Lm Modifier letter<br>
+ Lo Other letter<br>
+ Lt Title case letter<br>
+ Lu Upper case letter<br>
+ <p>
+ M Mark<br>
+ Mc Spacing mark<br>
+ Me Enclosing mark<br>
+ Mn Non-spacing mark<br>
+ <p>
+ N Number<br>
+ Nd Decimal number<br>
+ Nl Letter number<br>
+ No Other number<br>
+ <p>
+ P Punctuation<br>
+ Pc Connector punctuation<br>
+ Pd Dash punctuation<br>
+ Pe Close punctuation<br>
+ Pf Final punctuation<br>
+ Pi Initial punctuation<br>
+ Po Other punctuation<br>
+ Ps Open punctuation<br>
+ <p>
+ S Symbol<br>
+ Sc Currency symbol<br>
+ Sk Modifier symbol<br>
+ Sm Mathematical symbol<br>
+ So Other symbol<br>
+ <p>
+ Z Separator<br>
+ Zl Line separator<br>
+ Zp Paragraph separator<br>
+ Zs Space separator<br>
+ <p>
+Extended properties such as "Greek" or "InMusicalSymbols" are not supported
+by PCRE. <p>
+Specifying caseless matching does not affect these escape sequences.
+For example, \p{Lu} always matches only upper case letters. <p>
+The \X escape
+matches any number of Unicode characters that form an extended Unicode
+sequence. \X is equivalent to <p>
+ (?&gt;\PM\pM*)<br>
+ <p>
+That is, it matches a character without the "mark" property, followed
+by zero or more characters with the "mark" property, and treats the sequence
+as an atomic group (see below). Characters with the "mark" property are
+typically accents that affect the preceding character. <p>
+Matching characters
+by Unicode property is not fast, because PCRE has to search a structure
+that contains data for over fifteen thousand characters. That is why the
+traditional escape sequences such as \d and \w do not use Unicode properties
+in PCRE.
+<h3><a name='sect6' href='#toc6'>Simple assertions</a></h3>
+ <p>
+The fourth use of backslash is for certain
+simple assertions. An assertion specifies a condition that has to be met
+at a particular point in a match, without consuming any characters from
+the subject string. The use of subpatterns for more complicated assertions
+is described below. The backslashed assertions are: <p>
+ \b matches at
+a word boundary<br>
+ \B matches when not at a word boundary<br>
+ \A matches at start of subject<br>
+ \Z matches at end of subject or before newline at end<br>
+ \z matches at end of subject<br>
+ \G matches at first matching position in subject<br>
+ <p>
+These assertions may not appear in character classes (but note that \b
+has a different meaning, namely the backspace character, inside a character
+class). <p>
+A word boundary is a position in the subject string where the current
+character and the previous character do not both match \w or \W (i.e. one matches
+\w and the other matches \W), or the start or end of the string if the first
+or last character matches \w, respectively. <p>
+The \A, \Z, and \z assertions differ
+from the traditional circumflex and dollar (described in the next section)
+in that they only ever match at the very start and end of the subject string,
+whatever options are set. Thus, they are independent of multiline mode. These
+three assertions are not affected by the PCRE_NOTBOL or PCRE_NOTEOL options,
+which affect only the behaviour of the circumflex and dollar metacharacters.
+However, if the <i>startoffset</i> argument of <b>pcre_exec()</b> is non-zero, indicating
+that matching is to start at a point other than the beginning of the subject,
+\A can never match. The difference between \Z and \z is that \Z matches before
+a newline that is the last character of the string as well as at the end
+of the string, whereas \z matches only at the end. <p>
+The \G assertion is true
+only when the current matching position is at the start point of the match,
+as specified by the <i>startoffset</i> argument of <b>pcre_exec()</b>. It differs from
+\A when the value of <i>startoffset</i> is non-zero. By calling <b>pcre_exec()</b> multiple
+times with appropriate arguments, you can mimic Perl&rsquo;s /g option, and it
+is in this kind of implementation where \G can be useful. <p>
+Note, however,
+that PCRE&rsquo;s interpretation of \G, as the start of the current match, is subtly
+different from Perl&rsquo;s, which defines it as the end of the previous match.
+In Perl, these can be different when the previously matched string was
+empty. Because PCRE does just one match at a time, it cannot reproduce this
+behaviour. <p>
+If all the alternatives of a pattern begin with \G, the expression
+is anchored to the starting match position, and the "anchored" flag is
+set in the compiled regular expression.
+<h2><a name='sect7' href='#toc7'>Circumflex and Dollar</a></h2>
+ <p>
+Outside
+a character class, in the default matching mode, the circumflex character
+is an assertion that is true only if the current matching point is at the
+start of the subject string. If the <i>startoffset</i> argument of <b>pcre_exec()</b>
+is non-zero, circumflex can never match if the PCRE_MULTILINE option is
+unset. Inside a character class, circumflex has an entirely different meaning
+ (see below). <p>
+Circumflex need not be the first character of the pattern
+if a number of alternatives are involved, but it should be the first thing
+in each alternative in which it appears if the pattern is ever to match
+that branch. If all possible alternatives start with a circumflex, that
+is, if the pattern is constrained to match only at the start of the subject,
+it is said to be an "anchored" pattern. (There are also other constructs
+that can cause a pattern to be anchored.) <p>
+A dollar character is an assertion
+that is true only if the current matching point is at the end of the subject
+string, or immediately before a newline character that is the last character
+in the string (by default). Dollar need not be the last character of the
+pattern if a number of alternatives are involved, but it should be the
+last item in any branch in which it appears. Dollar has no special meaning
+in a character class. <p>
+The meaning of dollar can be changed so that it matches
+only at the very end of the string, by setting the PCRE_DOLLAR_ENDONLY
+option at compile time. This does not affect the \Z assertion. <p>
+The meanings
+of the circumflex and dollar characters are changed if the PCRE_MULTILINE
+option is set. When this is the case, they match immediately after and immediately
+before an internal newline character, respectively, in addition to matching
+at the start and end of the subject string. For example, the pattern /^abc$/
+matches the subject string "def\nabc" (where \n represents a newline character)
+in multiline mode, but not otherwise. Consequently, patterns that are anchored
+in single line mode because all branches start with ^ are not anchored in
+multiline mode, and a match for circumflex is possible when the <i>startoffset</i>
+argument of <b>pcre_exec()</b> is non-zero. The PCRE_DOLLAR_ENDONLY option is ignored
+if PCRE_MULTILINE is set. <p>
+Note that the sequences \A, \Z, and \z can be used
+to match the start and end of the subject in both modes, and if all branches
+of a pattern start with \A it is always anchored, whether PCRE_MULTILINE
+is set or not.
+<h2><a name='sect8' href='#toc8'>Full Stop (period, Dot)</a></h2>
+ <p>
+Outside a character class, a dot
+in the pattern matches any one character in the subject, including a non-printing
+character, but not (by default) newline. In UTF-8 mode, a dot matches any
+UTF-8 character, which might be more than one byte long, except (by default)
+newline. If the PCRE_DOTALL option is set, dots match newlines as well. The
+handling of dot is entirely independent of the handling of circumflex and
+dollar, the only relationship being that they both involve newline characters.
+Dot has no special meaning in a character class.
+<h2><a name='sect9' href='#toc9'>Matching a Single Byte</a></h2>
+
+<p>
+Outside a character class, the escape sequence \C matches any one byte,
+both in and out of UTF-8 mode. Unlike a dot, it can match a newline. The feature
+is provided in Perl in order to match individual bytes in UTF-8 mode. Because
+it breaks up UTF-8 characters into individual bytes, what remains in the
+string may be a malformed UTF-8 string. For this reason, the \C escape sequence
+is best avoided. <p>
+PCRE does not allow \C to appear in lookbehind assertions
+ (described below), because in UTF-8 mode this would make it impossible
+to calculate the length of the lookbehind.
+<h2><a name='sect10' href='#toc10'>Square Brackets and Character
+Classes</a></h2>
+ <p>
+An opening square bracket introduces a character class, terminated
+by a closing square bracket. A closing square bracket on its own is not
+special. If a closing square bracket is required as a member of the class,
+it should be the first data character in the class (after an initial circumflex,
+if present) or escaped with a backslash. <p>
+A character class matches a single
+character in the subject. In UTF-8 mode, the character may occupy more than
+one byte. A matched character must be in the set of characters defined by
+the class, unless the first character in the class definition is a circumflex,
+in which case the subject character must not be in the set defined by the
+class. If a circumflex is actually required as a member of the class, ensure
+it is not the first character, or escape it with a backslash. <p>
+For example,
+the character class [aeiou] matches any lower case vowel, while [^aeiou]
+matches any character that is not a lower case vowel. Note that a circumflex
+is just a convenient notation for specifying the characters that are in
+the class by enumerating those that are not. A class that starts with a
+circumflex is not an assertion: it still consumes a character from the
+subject string, and therefore it fails if the current pointer is at the
+end of the string. <p>
+In UTF-8 mode, characters with values greater than 255
+can be included in a class as a literal string of bytes, or by using the
+\x{ escaping mechanism. <p>
+When caseless matching is set, any letters in a class
+represent both their upper case and lower case versions, so for example,
+a caseless [aeiou] matches "A" as well as "a", and a caseless [^aeiou] does
+not match "A", whereas a caseful version would. When running in UTF-8 mode,
+PCRE supports the concept of case for characters with values greater than
+128 only when it is compiled with Unicode property support. <p>
+The newline
+character is never treated in any special way in character classes, whatever
+the setting of the PCRE_DOTALL or PCRE_MULTILINE options is. A class such
+as [^a] will always match a newline. <p>
+The minus (hyphen) character can be
+used to specify a range of characters in a character class. For example,
+[d-m] matches any letter between d and m, inclusive. If a minus character
+is required in a class, it must be escaped with a backslash or appear in
+a position where it cannot be interpreted as indicating a range, typically
+as the first or last character in the class. <p>
+It is not possible to have
+the literal character "]" as the end character of a range. A pattern such
+as [W-]46] is interpreted as a class of two characters ("W" and "-") followed
+by a literal string "46]", so it would match "W46]" or "-46]". However, if
+the "]" is escaped with a backslash it is interpreted as the end of range,
+so [W-\]46] is interpreted as a class containing a range followed by two
+other characters. The octal or hexadecimal representation of "]" can also
+be used to end a range. <p>
+Ranges operate in the collating sequence of character
+values. They can also be used for characters specified numerically, for
+example [\000-\037]. In UTF-8 mode, ranges can include characters whose values
+are greater than 255, for example [\x{100}-\x{2ff}]. <p>
+If a range that includes
+letters is used when caseless matching is set, it matches the letters in
+either case. For example, [W-c] is equivalent to [][\\^_&lsquo;wxyzabc], matched caselessly,
+and in non-UTF-8 mode, if character tables for the "fr_FR" locale are in
+use, [\xc8-\xcb] matches accented E characters in both cases. In UTF-8 mode,
+PCRE supports the concept of case for characters with values greater than
+128 only when it is compiled with Unicode property support. <p>
+The character
+types \d, \D, \p, \P, \s, \S, \w, and \W may also appear in a character class,
+and add the characters that they match to the class. For example, [\dABCDEF]
+matches any hexadecimal digit. A circumflex can conveniently be used with
+the upper case character types to specify a more restricted set of characters
+than the matching lower case type. For example, the class [^\W_] matches any
+letter or digit, but not underscore. <p>
+The only metacharacters that are recognized
+in character classes are backslash, hyphen (only where it can be interpreted
+as specifying a range), circumflex (only at the start), opening square
+bracket (only when it can be interpreted as introducing a POSIX class name
+- see the next section), and the terminating closing square bracket. However,
+escaping other non-alphanumeric characters does no harm.
+<h2><a name='sect11' href='#toc11'>Posix Character
+Classes</a></h2>
+ <p>
+Perl supports the POSIX notation for character classes. This uses
+names enclosed by [: and :] within the enclosing square brackets. PCRE also
+supports this notation. For example, <p>
+ [01[:alpha:]%]<br>
+ <p>
+matches "0", "1", any alphabetic character, or "%". The supported class
+names are <p>
+ alnum letters and digits<br>
+ alpha letters<br>
+ ascii character codes 0 - 127<br>
+ blank space or tab only<br>
+ cntrl control characters<br>
+ digit decimal digits (same as \d)<br>
+ graph printing characters, excluding space<br>
+ lower lower case letters<br>
+ print printing characters, including space<br>
+ punct printing characters, excluding letters and digits<br>
+ space white space (not quite the same as \s)<br>
+ upper upper case letters<br>
+ word "word" characters (same as \w)<br>
+ xdigit hexadecimal digits<br>
+ <p>
+The "space" characters are <a href='HT.9.html'>HT (9)</a>
+, LF (10), VT (11), FF (12), CR (13),
+and space (32). Notice that this list includes the VT character (code 11).
+This makes "space" different to \s, which does not include VT (for Perl
+compatibility). <p>
+The name "word" is a Perl extension, and "blank" is a GNU
+extension from Perl 5.8. Another Perl extension is negation, which is indicated
+by a ^ character after the colon. For example, <p>
+ [12[:^digit:]]<br>
+ <p>
+matches "1", "2", or any non-digit. PCRE (and Perl) also recognize the POSIX
+syntax [.ch.] and [=ch=] where "ch" is a "collating element", but these are
+not supported, and an error is given if they are encountered. <p>
+In UTF-8 mode,
+characters with values greater than 128 do not match any of the POSIX character
+classes.
+<h2><a name='sect12' href='#toc12'>Vertical Bar</a></h2>
+ <p>
+Vertical bar characters are used to separate alternative
+patterns. For example, the pattern <p>
+ gilbert|sullivan<br>
+ <p>
+matches either "gilbert" or "sullivan". Any number of alternatives may
+appear, and an empty alternative is permitted (matching the empty string).
+The matching process tries each alternative in turn, from left to right,
+and the first one that succeeds is used. If the alternatives are within
+a subpattern (defined below), "succeeds" means matching the rest of
+the main pattern as well as the alternative in the subpattern.
+<h2><a name='sect13' href='#toc13'>Internal
+Option Setting</a></h2>
+ <p>
+The settings of the PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL,
+and PCRE_EXTENDED options can be changed from within the pattern by a sequence
+of Perl option letters enclosed between "(?" and ")". The option letters
+are <p>
+ i for PCRE_CASELESS<br>
+ m for PCRE_MULTILINE<br>
+ s for PCRE_DOTALL<br>
+ x for PCRE_EXTENDED<br>
+ <p>
+For example, (?im) sets caseless, multiline matching. It is also possible
+to unset these options by preceding the letter with a hyphen, and a combined
+setting and unsetting such as (?im-sx), which sets PCRE_CASELESS and PCRE_MULTILINE
+while unsetting PCRE_DOTALL and PCRE_EXTENDED, is also permitted. If a letter
+appears both before and after the hyphen, the option is unset. <p>
+When an option
+change occurs at top level (that is, not inside subpattern parentheses),
+the change applies to the remainder of the pattern that follows. If the
+change is placed right at the start of a pattern, PCRE extracts it into
+the global options (and it will therefore show up in data extracted by
+the <b>pcre_fullinfo()</b> function). <p>
+An option change within a subpattern affects
+only that part of the current pattern that follows it, so <p>
+ (a(?i)b)c<br>
+ <p>
+matches abc and aBc and no other strings (assuming PCRE_CASELESS is not
+used). By this means, options can be made to have different settings in
+different parts of the pattern. Any changes made in one alternative do carry
+on into subsequent branches within the same subpattern. For example, <p>
+ (a(?i)b|c)<br>
+ <p>
+matches "ab", "aB", "c", and "C", even though when matching "C" the first
+branch is abandoned before the option setting. This is because the effects
+of option settings happen at compile time. There would be some very weird
+behaviour otherwise. <p>
+The PCRE-specific options PCRE_UNGREEDY and PCRE_EXTRA
+can be changed in the same way as the Perl-compatible options by using the
+characters U and X respectively. The (?X) flag setting is special in that
+it must always occur earlier in the pattern than any of the additional
+features it turns on, even when it is at top level. It is best to put it
+at the start.
+<h2><a name='sect14' href='#toc14'>Subpatterns</a></h2>
+ <p>
+Subpatterns are delimited by parentheses (round
+brackets), which can be nested. Turning part of a pattern into a subpattern
+does two things: <p>
+1. It localizes a set of alternatives. For example, the
+pattern <p>
+ cat(aract|erpillar|)<br>
+ <p>
+matches one of the words "cat", "cataract", or "caterpillar". Without the
+parentheses, it would match "cataract", "erpillar" or the empty string.
+<p>
+2. It sets up the subpattern as a capturing subpattern. This means that,
+when the whole pattern matches, that portion of the subject string that
+matched the subpattern is passed back to the caller via the <i>ovector</i> argument
+of <b>pcre_exec()</b>. Opening parentheses are counted from left to right (starting
+from 1) to obtain numbers for the capturing subpatterns. <p>
+For example, if
+the string "the red king" is matched against the pattern <p>
+ the ((red|white)
+(king|queen))<br>
+ <p>
+the captured substrings are "red king", "red", and "king", and are numbered
+1, 2, and 3, respectively. <p>
+The fact that plain parentheses fulfil two functions
+is not always helpful. There are often times when a grouping subpattern
+is required without a capturing requirement. If an opening parenthesis is
+followed by a question mark and a colon, the subpattern does not do any
+capturing, and is not counted when computing the number of any subsequent
+capturing subpatterns. For example, if the string "the white queen" is matched
+against the pattern <p>
+ the ((?:red|white) (king|queen))<br>
+ <p>
+the captured substrings are "white queen" and "queen", and are numbered
+1 and 2. The maximum number of capturing subpatterns is 65535, and the maximum
+depth of nesting of all subpatterns, both capturing and non-capturing, is
+200. <p>
+As a convenient shorthand, if any option settings are required at the
+start of a non-capturing subpattern, the option letters may appear between
+the "?" and the ":". Thus the two patterns <p>
+ (?i:saturday|sunday)<br>
+ (?:(?i)saturday|sunday)<br>
+ <p>
+match exactly the same set of strings. Because alternative branches are
+tried from left to right, and options are not reset until the end of the
+subpattern is reached, an option setting in one branch does affect subsequent
+branches, so the above patterns match "SUNDAY" as well as "Saturday".
+
+<h2><a name='sect15' href='#toc15'>Named Subpatterns</a></h2>
+ <p>
+Identifying capturing parentheses by number is simple,
+but it can be very hard to keep track of the numbers in complicated regular
+expressions. Furthermore, if an expression is modified, the numbers may
+change. To help with this difficulty, PCRE supports the naming of subpatterns,
+something that Perl does not provide. The Python syntax (?P&lt;name&gt;...) is used.
+Names consist of alphanumeric characters and underscores, and must be unique
+within a pattern. <p>
+Named capturing parentheses are still allocated numbers
+as well as names. The PCRE API provides function calls for extracting the
+name-to-number translation table from a compiled pattern. There is also a
+convenience function for extracting a captured substring by name. For further
+details see the <b>pcreapi</b> documentation.
+<h2><a name='sect16' href='#toc16'>Repetition</a></h2>
+ <p>
+Repetition is specified
+by quantifiers, which can follow any of the following items: <p>
+ a literal
+data character<br>
+ the . metacharacter<br>
+ the \C escape sequence<br>
+ the \X escape sequence (in UTF-8 mode with Unicode properties)<br>
+ an escape such as \d that matches a single character<br>
+ a character class<br>
+ a back reference (see next section)<br>
+ a parenthesized subpattern (unless it is an assertion)<br>
+ <p>
+The general repetition quantifier specifies a minimum and maximum number
+of permitted matches, by giving the two numbers in curly brackets (braces),
+separated by a comma. The numbers must be less than 65536, and the first
+must be less than or equal to the second. For example: <p>
+ z{2,4}<br>
+ <p>
+matches "zz", "zzz", or "zzzz". A closing brace on its own is not a special
+character. If the second number is omitted, but the comma is present, there
+is no upper limit; if the second number and the comma are both omitted,
+the quantifier specifies an exact number of required matches. Thus <p>
+ [aeiou]{3,}<br>
+ <p>
+matches at least 3 successive vowels, but may match many more, while <p>
+
+ \d{8}<br>
+ <p>
+matches exactly 8 digits. An opening curly bracket that appears in a position
+where a quantifier is not allowed, or one that does not match the syntax
+of a quantifier, is taken as a literal character. For example, {,6} is not
+a quantifier, but a literal string of four characters. <p>
+In UTF-8 mode, quantifiers
+apply to UTF-8 characters rather than to individual bytes. Thus, for example,
+\x{100}{2} matches two UTF-8 characters, each of which is represented by
+a two-byte sequence. Similarly, when Unicode property support is available,
+\X{3} matches three Unicode extended sequences, each of which may be several
+bytes long (and they may be of different lengths). <p>
+The quantifier {0} is
+permitted, causing the expression to behave as if the previous item and
+the quantifier were not present. <p>
+For convenience (and historical compatibility)
+the three most common quantifiers have single-character abbreviations: <p>
+
+ * is equivalent to {0,}<br>
+ + is equivalent to {1,}<br>
+ ? is equivalent to {0,1}<br>
+ <p>
+It is possible to construct infinite loops by following a subpattern that
+can match no characters with a quantifier that has no upper limit, for
+example: <p>
+ (a?)*<br>
+ <p>
+Earlier versions of Perl and PCRE used to give an error at compile time
+for such patterns. However, because there are cases where this can be useful,
+such patterns are now accepted, but if any repetition of the subpattern
+does in fact match no characters, the loop is forcibly broken. <p>
+By default,
+the quantifiers are "greedy", that is, they match as much as possible (up
+to the maximum number of permitted times), without causing the rest of
+the pattern to fail. The classic example of where this gives problems is
+in trying to match comments in C programs. These appear between /* and */
+and within the comment, individual * and / characters may appear. An attempt
+to match C comments by applying the pattern <p>
+ /\*.*\*/<br>
+ <p>
+to the string <p>
+ /* first comment */ not comment /* second comment */<br>
+ <p>
+fails, because it matches the entire string owing to the greediness of
+the .* item. <p>
+However, if a quantifier is followed by a question mark, it
+ceases to be greedy, and instead matches the minimum number of times possible,
+so the pattern <p>
+ /\*.*?\*/<br>
+ <p>
+does the right thing with the C comments. The meaning of the various quantifiers
+is not otherwise changed, just the preferred number of matches. Do not confuse
+this use of question mark with its use as a quantifier in its own right.
+Because it has two uses, it can sometimes appear doubled, as in <p>
+ \d??\d<br>
+ <p>
+which matches one digit by preference, but can match two if that is the
+only way the rest of the pattern matches. <p>
+If the PCRE_UNGREEDY option is
+set (an option which is not available in Perl), the quantifiers are not
+greedy by default, but individual ones can be made greedy by following
+them with a question mark. In other words, it inverts the default behaviour.
+<p>
+When a parenthesized subpattern is quantified with a minimum repeat count
+that is greater than 1 or with a limited maximum, more memory is required
+for the compiled pattern, in proportion to the size of the minimum or maximum.
+<p>
+If a pattern starts with .* or .{0,} and the PCRE_DOTALL option (equivalent
+to Perl&rsquo;s /s) is set, thus allowing the . to match newlines, the pattern
+is implicitly anchored, because whatever follows will be tried against
+every character position in the subject string, so there is no point in
+retrying the overall match at any position after the first. PCRE normally
+treats such a pattern as though it were preceded by \A. <p>
+In cases where it
+is known that the subject string contains no newlines, it is worth setting
+PCRE_DOTALL in order to obtain this optimization, or alternatively using
+^ to indicate anchoring explicitly. <p>
+However, there is one situation where
+the optimization cannot be used. When .* is inside capturing parentheses
+that are the subject of a backreference elsewhere in the pattern, a match
+at the start may fail, and a later one succeed. Consider, for example: <p>
+
+ (.*)abc\1<br>
+ <p>
+If the subject is "xyz123abc123" the match point is the fourth character.
+For this reason, such a pattern is not implicitly anchored. <p>
+When a capturing
+subpattern is repeated, the value captured is the substring that matched
+the final iteration. For example, after <p>
+ (tweedle[dume]{3}\s*)+<br>
+ <p>
+has matched "tweedledum tweedledee" the value of the captured substring
+is "tweedledee". However, if there are nested capturing subpatterns, the
+corresponding captured values may have been set in previous iterations.
+For example, after <p>
+ /(a|(b))+/<br>
+ <p>
+matches "aba" the value of the second captured substring is "b".
+<h2><a name='sect17' href='#toc17'>Atomic
+Grouping and Possessive Quantifiers</a></h2>
+ <p>
+With both maximizing and minimizing
+repetition, failure of what follows normally causes the repeated item to
+be re-evaluated to see if a different number of repeats allows the rest
+of the pattern to match. Sometimes it is useful to prevent this, either
+to change the nature of the match, or to cause it fail earlier than it
+otherwise might, when the author of the pattern knows there is no point
+in carrying on. <p>
+Consider, for example, the pattern \d+foo when applied to
+the subject line <p>
+ 123456bar<br>
+ <p>
+After matching all 6 digits and then failing to match "foo", the normal
+action of the matcher is to try again with only 5 digits matching the \d+
+item, and then with 4, and so on, before ultimately failing. "Atomic grouping"
+(a term taken from Jeffrey Friedl&rsquo;s book) provides the means for specifying
+that once a subpattern has matched, it is not to be re-evaluated in this
+way. <p>
+If we use atomic grouping for the previous example, the matcher would
+give up immediately on failing to match "foo" the first time. The notation
+is a kind of special parenthesis, starting with (?&gt; as in this example:
+<p>
+ (?&gt;\d+)foo<br>
+ <p>
+This kind of parenthesis "locks up" the part of the pattern it contains
+once it has matched, and a failure further into the pattern is prevented
+from backtracking into it. Backtracking past it to previous items, however,
+works as normal. <p>
+An alternative description is that a subpattern of this
+type matches the string of characters that an identical standalone pattern
+would match, if anchored at the current point in the subject string. <p>
+Atomic
+grouping subpatterns are not capturing subpatterns. Simple cases such as
+the above example can be thought of as a maximizing repeat that must swallow
+everything it can. So, while both \d+ and \d+? are prepared to adjust the
+number of digits they match in order to make the rest of the pattern match,
+(?&gt;\d+) can only match an entire sequence of digits. <p>
+Atomic groups in general
+can of course contain arbitrarily complicated subpatterns, and can be nested.
+However, when the subpattern for an atomic group is just a single repeated
+item, as in the example above, a simpler notation, called a "possessive
+quantifier" can be used. This consists of an additional + character following
+a quantifier. Using this notation, the previous example can be rewritten
+as <p>
+ \d++foo<br>
+ <p>
+Possessive quantifiers are always greedy; the setting of the PCRE_UNGREEDY
+option is ignored. They are a convenient notation for the simpler forms
+of atomic group. However, there is no difference in the meaning or processing
+of a possessive quantifier and the equivalent atomic group. <p>
+The possessive
+quantifier syntax is an extension to the Perl syntax. It originates in Sun&rsquo;s
+Java package. <p>
+When a pattern contains an unlimited repeat inside a subpattern
+that can itself be repeated an unlimited number of times, the use of an
+atomic group is the only way to avoid some failing matches taking a very
+long time indeed. The pattern <p>
+ (\D+|&lt;\d+&gt;)*[!?]<br>
+ <p>
+matches an unlimited number of substrings that either consist of non-digits,
+or digits enclosed in &lt;&gt;, followed by either ! or ?. When it matches, it runs
+quickly. However, if it is applied to <p>
+ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa<br>
+ <p>
+it takes a long time before reporting failure. This is because the string
+can be divided between the internal \D+ repeat and the external * repeat
+in a large number of ways, and all have to be tried. (The example uses [!?]
+rather than a single character at the end, because both PCRE and Perl have
+an optimization that allows for fast failure when a single character is
+used. They remember the last single character that is required for a match,
+and fail early if it is not present in the string.) If the pattern is changed
+so that it uses an atomic group, like this: <p>
+ ((?&gt;\D+)|&lt;\d+&gt;)*[!?]<br>
+ <p>
+sequences of non-digits cannot be broken, and failure happens quickly.
+
+<h2><a name='sect18' href='#toc18'>Back References</a></h2>
+ <p>
+Outside a character class, a backslash followed by a
+digit greater than 0 (and possibly further digits) is a back reference
+to a capturing subpattern earlier (that is, to its left) in the pattern,
+provided there have been that many previous capturing left parentheses.
+<p>
+However, if the decimal number following the backslash is less than 10,
+it is always taken as a back reference, and causes an error only if there
+are not that many capturing left parentheses in the entire pattern. In other
+words, the parentheses that are referenced need not be to the left of the
+reference for numbers less than 10. See the subsection entitled "Non-printing
+characters" above for further details of the handling of digits following
+a backslash. <p>
+A back reference matches whatever actually matched the capturing
+subpattern in the current subject string, rather than anything matching
+the subpattern itself (see "Subpatterns as subroutines" below for a
+way of doing that). So the pattern <p>
+ (sens|respons)e and \1ibility<br>
+ <p>
+matches "sense and sensibility" and "response and responsibility", but
+not "sense and responsibility". If caseful matching is in force at the time
+of the back reference, the case of letters is relevant. For example, <p>
+ ((?i)rah)\s+\1<br>
+ <p>
+matches "rah rah" and "RAH RAH", but not "RAH rah", even though the original
+capturing subpattern is matched caselessly. <p>
+Back references to named subpatterns
+use the Python syntax (?P=name). We could rewrite the above example as follows:
+<p>
+ (?&lt;p1&gt;(?i)rah)\s+(?P=p1)<br>
+ <p>
+There may be more than one back reference to the same subpattern. If a
+subpattern has not actually been used in a particular match, any back references
+to it always fail. For example, the pattern <p>
+ (a|(bc))\2<br>
+ <p>
+always fails if it starts to match "a" rather than "bc". Because there
+may be many capturing parentheses in a pattern, all digits following the
+backslash are taken as part of a potential back reference number. If the
+pattern continues with a digit character, some delimiter must be used to
+terminate the back reference. If the PCRE_EXTENDED option is set, this can
+be whitespace. Otherwise an empty comment (see "Comments" below) can
+be used. <p>
+A back reference that occurs inside the parentheses to which it
+refers fails when the subpattern is first used, so, for example, (a\1) never
+matches. However, such references can be useful inside repeated subpatterns.
+For example, the pattern <p>
+ (a|b\1)+<br>
+ <p>
+matches any number of "a"s and also "aba", "ababbaa" etc. At each iteration
+of the subpattern, the back reference matches the character string corresponding
+to the previous iteration. In order for this to work, the pattern must be
+such that the first iteration does not need to match the back reference.
+This can be done using alternation, as in the example above, or by a quantifier
+with a minimum of zero.
+<h2><a name='sect19' href='#toc19'>Assertions</a></h2>
+ <p>
+An assertion is a test on the characters
+following or preceding the current matching point that does not actually
+consume any characters. The simple assertions coded as \b, \B, \A, \G, \Z, \z,
+^ and $ are described above. <p>
+More complicated assertions are coded as
+subpatterns. There are two kinds: those that look ahead of the current position
+in the subject string, and those that look behind it. An assertion subpattern
+is matched in the normal way, except that it does not cause the current
+matching position to be changed. <p>
+Assertion subpatterns are not capturing
+subpatterns, and may not be repeated, because it makes no sense to assert
+the same thing several times. If any kind of assertion contains capturing
+subpatterns within it, these are counted for the purposes of numbering
+the capturing subpatterns in the whole pattern. However, substring capturing
+is carried out only for positive assertions, because it does not make sense
+for negative assertions.
+<h3><a name='sect20' href='#toc20'>Lookahead assertions</a></h3>
+ <p>
+Lookahead assertions start
+with (?= for positive assertions and (?! for negative assertions. For example,
+<p>
+ \w+(?=;)<br>
+ <p>
+matches a word followed by a semicolon, but does not include the semicolon
+in the match, and <p>
+ foo(?!bar)<br>
+ <p>
+matches any occurrence of "foo" that is not followed by "bar". Note that
+the apparently similar pattern <p>
+ (?!foo)bar<br>
+ <p>
+does not find an occurrence of "bar" that is preceded by something other
+than "foo"; it finds any occurrence of "bar" whatsoever, because the assertion
+(?!foo) is always true when the next three characters are "bar". A lookbehind
+assertion is needed to achieve the other effect. <p>
+If you want to force a
+matching failure at some point in a pattern, the most convenient way to
+do it is with (?!) because an empty string always matches, so an assertion
+that requires there not to be an empty string must always fail.
+<h3><a name='sect21' href='#toc21'>Lookbehind
+assertions</a></h3>
+ <p>
+Lookbehind assertions start with (?&lt;= for positive assertions
+and (?&lt;! for negative assertions. For example, <p>
+ (?&lt;!foo)bar<br>
+ <p>
+does find an occurrence of "bar" that is not preceded by "foo". The contents
+of a lookbehind assertion are restricted such that all the strings it matches
+must have a fixed length. However, if there are several alternatives, they
+do not all have to have the same fixed length. Thus <p>
+ (?&lt;=bullock|donkey)<br>
+ <p>
+is permitted, but <p>
+ (?&lt;!dogs?|cats?)<br>
+ <p>
+causes an error at compile time. Branches that match different length strings
+are permitted only at the top level of a lookbehind assertion. This is an
+extension compared with Perl (at least for 5.8), which requires all branches
+to match the same length of string. An assertion such as <p>
+ (?&lt;=ab(c|de))<br>
+ <p>
+is not permitted, because its single top-level branch can match two different
+lengths, but it is acceptable if rewritten to use two top-level branches:
+<p>
+ (?&lt;=abc|abde)<br>
+ <p>
+The implementation of lookbehind assertions is, for each alternative,
+to temporarily move the current position back by the fixed width and then
+try to match. If there are insufficient characters before the current position,
+the match is deemed to fail. <p>
+PCRE does not allow the \C escape (which matches
+a single byte in UTF-8 mode) to appear in lookbehind assertions, because
+it makes it impossible to calculate the length of the lookbehind. The \X
+escape, which can match different numbers of bytes, is also not permitted.
+<p>
+Atomic groups can be used in conjunction with lookbehind assertions to
+specify efficient matching at the end of the subject string. Consider a
+simple pattern such as <p>
+ abcd$<br>
+ <p>
+when applied to a long string that does not match. Because matching proceeds
+from left to right, PCRE will look for each "a" in the subject and then
+see if what follows matches the rest of the pattern. If the pattern is specified
+as <p>
+ ^.*abcd$<br>
+ <p>
+the initial .* matches the entire string at first, but when this fails
+(because there is no following "a"), it backtracks to match all but the
+last character, then all but the last two characters, and so on. Once again
+the search for "a" covers the entire string, from right to left, so we
+are no better off. However, if the pattern is written as <p>
+ ^(?&gt;.*)(?&lt;=abcd)<br>
+ <p>
+or, equivalently, using the possessive quantifier syntax, <p>
+ ^.*+(?&lt;=abcd)<br>
+ <p>
+there can be no backtracking for the .* item; it can match only the entire
+string. The subsequent lookbehind assertion does a single test on the last
+four characters. If it fails, the match fails immediately. For long strings,
+this approach makes a significant difference to the processing time.
+<h3><a name='sect22' href='#toc22'>Using
+multiple assertions</a></h3>
+ <p>
+Several assertions (of any sort) may occur in succession.
+For example, <p>
+ (?&lt;=\d{3})(?&lt;!999)foo<br>
+ <p>
+matches "foo" preceded by three digits that are not "999". Notice that
+each of the assertions is applied independently at the same point in the
+subject string. First there is a check that the previous three characters
+are all digits, and then there is a check that the same three characters
+are not "999". This pattern does <i>not</i> match "foo" preceded by six characters,
+the first of which are digits and the last three of which are not "999".
+For example, it doesn&rsquo;t match "123abcfoo". A pattern to do that is <p>
+ (?&lt;=\d{3}...)(?&lt;!999)foo<br>
+ <p>
+This time the first assertion looks at the preceding six characters, checking
+that the first three are digits, and then the second assertion checks that
+the preceding three characters are not "999". <p>
+Assertions can be nested in
+any combination. For example, <p>
+ (?&lt;=(?&lt;!foo)bar)baz<br>
+ <p>
+matches an occurrence of "baz" that is preceded by "bar" which in turn
+is not preceded by "foo", while <p>
+ (?&lt;=\d{3}(?!999)...)foo<br>
+ <p>
+is another pattern that matches "foo" preceded by three digits and any
+three characters that are not "999".
+<h2><a name='sect23' href='#toc23'>Conditional Subpatterns</a></h2>
+ <p>
+It is possible
+to cause the matching process to obey a subpattern conditionally or to
+choose between two alternative subpatterns, depending on the result of
+an assertion, or whether a previous capturing subpattern matched or not.
+The two possible forms of conditional subpattern are <p>
+ (?(condition)yes-pattern)<br>
+ (?(condition)yes-pattern|no-pattern)<br>
+ <p>
+If the condition is satisfied, the yes-pattern is used; otherwise the no-pattern
+(if present) is used. If there are more than two alternatives in the subpattern,
+a compile-time error occurs. <p>
+There are three kinds of condition. If the text
+between the parentheses consists of a sequence of digits, the condition
+is satisfied if the capturing subpattern of that number has previously
+matched. The number must be greater than zero. Consider the following pattern,
+which contains non-significant white space to make it more readable (assume
+the PCRE_EXTENDED option) and to divide it into three parts for ease of
+discussion: <p>
+ ( \( )? [^()]+ (?(1) \) )<br>
+ <p>
+The first part matches an optional opening parenthesis, and if that character
+is present, sets it as the first captured substring. The second part matches
+one or more characters that are not parentheses. The third part is a conditional
+subpattern that tests whether the first set of parentheses matched or not.
+If they did, that is, if subject started with an opening parenthesis, the
+condition is true, and so the yes-pattern is executed and a closing parenthesis
+is required. Otherwise, since no-pattern is not present, the subpattern matches
+nothing. In other words, this pattern matches a sequence of non-parentheses,
+optionally enclosed in parentheses. <p>
+If the condition is the string (R),
+it is satisfied if a recursive call to the pattern or subpattern has been
+made. At "top level", the condition is false. This is a PCRE extension. Recursive
+patterns are described in the next section. <p>
+If the condition is not a sequence
+of digits or (R), it must be an assertion. This may be a positive or negative
+lookahead or lookbehind assertion. Consider this pattern, again containing
+non-significant white space, and with the two alternatives on the second
+line: <p>
+ (?(?=[^a-z]*[a-z])<br>
+ \d{2}-[a-z]{3}-\d{2} | \d{2}-\d{2}-\d{2} )<br>
+ <p>
+The condition is a positive lookahead assertion that matches an optional
+sequence of non-letters followed by a letter. In other words, it tests for
+the presence of at least one letter in the subject. If a letter is found,
+the subject is matched against the first alternative; otherwise it is matched
+against the second. This pattern matches strings in one of the two forms
+dd-aaa-dd or dd-dd-dd, where aaa are letters and dd are digits.
+<h2><a name='sect24' href='#toc24'>Comments</a></h2>
+
+<p>
+The sequence (?# marks the start of a comment that continues up to the
+next closing parenthesis. Nested parentheses are not permitted. The characters
+that make up a comment play no part in the pattern matching at all. <p>
+If the
+PCRE_EXTENDED option is set, an unescaped # character outside a character
+class introduces a comment that continues up to the next newline character
+in the pattern.
+<h2><a name='sect25' href='#toc25'>Recursive Patterns</a></h2>
+ <p>
+Consider the problem of matching a
+string in parentheses, allowing for unlimited nested parentheses. Without
+the use of recursion, the best that can be done is to use a pattern that
+matches up to some fixed depth of nesting. It is not possible to handle
+an arbitrary nesting depth. Perl provides a facility that allows regular
+expressions to recurse (amongst other things). It does this by interpolating
+Perl code in the expression at run time, and the code can refer to the
+expression itself. A Perl pattern to solve the parentheses problem can be
+created like this: <p>
+ $re = qr{\( (?: (?&gt;[^()]+) | (?p{$re}) )* \)}x;<br>
+ <p>
+The (?p{...}) item interpolates Perl code at run time, and in this case refers
+recursively to the pattern in which it appears. Obviously, PCRE cannot support
+the interpolation of Perl code. Instead, it supports some special syntax
+for recursion of the entire pattern, and also for individual subpattern
+recursion. <p>
+The special item that consists of (? followed by a number greater
+than zero and a closing parenthesis is a recursive call of the subpattern
+of the given number, provided that it occurs inside that subpattern. (If
+not, it is a "subroutine" call, which is described in the next section.)
+The special item (?R) is a recursive call of the entire regular expression.
+<p>
+For example, this PCRE pattern solves the nested parentheses problem (assume
+the PCRE_EXTENDED option is set so that white space is ignored): <p>
+ \( (
+(?&gt;[^()]+) | (?R) )* \)<br>
+ <p>
+First it matches an opening parenthesis. Then it matches any number of
+substrings which can either be a sequence of non-parentheses, or a recursive
+match of the pattern itself (that is a correctly parenthesized substring).
+Finally there is a closing parenthesis. <p>
+If this were part of a larger pattern,
+you would not want to recurse the entire pattern, so instead you could
+use this: <p>
+ ( \( ( (?&gt;[^()]+) | (?1) )* \) )<br>
+ <p>
+We have put the pattern into parentheses, and caused the recursion to
+refer to them instead of the whole pattern. In a larger pattern, keeping
+track of parenthesis numbers can be tricky. It may be more convenient to
+use named parentheses instead. For this, PCRE uses (?P&gt;name), which is an
+extension to the Python syntax that PCRE uses for named parentheses (Perl
+does not provide named parentheses). We could rewrite the above example
+as follows: <p>
+ (?P&lt;pn&gt; \( ( (?&gt;[^()]+) | (?P&gt;pn) )* \) )<br>
+ <p>
+This particular example pattern contains nested unlimited repeats, and
+so the use of atomic grouping for matching strings of non-parentheses is
+important when applying the pattern to strings that do not match. For example,
+when this pattern is applied to <p>
+ (aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa()<br>
+ <p>
+it yields "no match" quickly. However, if atomic grouping is not used,
+the match runs for a very long time indeed because there are so many different
+ways the + and * repeats can carve up the subject, and all have to be tested
+before failure can be reported. <p>
+At the end of a match, the values set for
+any capturing subpatterns are those from the outermost level of the recursion
+at which the subpattern value is set. If you want to obtain intermediate
+values, a callout function can be used (see the next section and the <b>pcrecallout</b>
+ documentation). If the pattern above is matched against <p>
+ (ab(cd)ef)<br>
+ <p>
+the value for the capturing parentheses is "ef", which is the last value
+taken on at the top level. If additional parentheses are added, giving <p>
+
+ \( ( ( (?&gt;[^()]+) | (?R) )* ) \)<br>
+ ^ ^<br>
+ ^ ^<br>
+ <p>
+the string they capture is "ab(cd)ef", the contents of the top level parentheses.
+If there are more than 15 capturing parentheses in a pattern, PCRE has
+to obtain extra memory to store data during a recursion, which it does
+by using <b>pcre_malloc</b>, freeing it via <b>pcre_free</b> afterwards. If no memory
+can be obtained, the match fails with the PCRE_ERROR_NOMEMORY error. <p>
+Do
+not confuse the (?R) item with the condition (R), which tests for recursion.
+Consider this pattern, which matches text in angle brackets, allowing for
+arbitrary nesting. Only digits are allowed in nested brackets (that is,
+when recursing), whereas any characters are permitted at the outer level.
+<p>
+ &lt; (?: (?(R) \d++ | [^&lt;&gt;]*+) | (?R)) * &gt;<br>
+ <p>
+In this pattern, (?(R) is the start of a conditional subpattern, with
+two different alternatives for the recursive and non-recursive cases. The
+(?R) item is the actual recursive call.
+<h2><a name='sect26' href='#toc26'>Subpatterns As Subroutines</a></h2>
+ <p>
+If
+the syntax for a recursive subpattern reference (either by number or by
+name) is used outside the parentheses to which it refers, it operates like
+a subroutine in a programming language. An earlier example pointed out that
+the pattern <p>
+ (sens|respons)e and \1ibility<br>
+ <p>
+matches "sense and sensibility" and "response and responsibility", but
+not "sense and responsibility". If instead the pattern <p>
+ (sens|respons)e
+and (?1)ibility<br>
+ <p>
+is used, it does match "sense and responsibility" as well as the other
+two strings. Such references must, however, follow the subpattern to which
+they refer.
+<h2><a name='sect27' href='#toc27'>Callouts</a></h2>
+ <p>
+Perl has a feature whereby using the sequence (?{...})
+causes arbitrary Perl code to be obeyed in the middle of matching a regular
+expression. This makes it possible, amongst other things, to extract different
+substrings that match the same pair of parentheses when there is a repetition.
+<p>
+PCRE provides a similar feature, but of course it cannot obey arbitrary
+Perl code. The feature is called "callout". The caller of PCRE provides an
+external function by putting its entry point in the global variable <i>pcre_callout</i>.
+By default, this variable contains NULL, which disables all calling out.
+<p>
+Within a regular expression, (?C) indicates the points at which the external
+function is to be called. If you want to identify different callout points,
+you can put a number less than 256 after the letter C. The default value
+is zero. For example, this pattern has two callout points: <p>
+ (?C1)dabc(?C2)def<br>
+ <p>
+If the PCRE_AUTO_CALLOUT flag is passed to <b>pcre_compile()</b>, callouts are
+automatically installed before each item in the pattern. They are all numbered
+255. <p>
+During matching, when PCRE reaches a callout point (and <i>pcre_callout</i>
+is set), the external function is called. It is provided with the number
+of the callout, the position in the pattern, and, optionally, one item
+of data originally supplied by the caller of <b>pcre_exec()</b>. The callout function
+may cause matching to proceed, to backtrack, or to fail altogether. A complete
+description of the interface to the callout function is given in the <b>pcrecallout</b>
+ documentation. <p>
+ Last updated: 09 September 2004 <br>
+Copyright (c) 1997-2004 University of Cambridge. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Pcre Regular Expression Details</a></li>
+<li><a name='toc2' href='#sect2'>Backslash</a></li>
+<ul>
+<li><a name='toc3' href='#sect3'>Non-printing characters</a></li>
+<li><a name='toc4' href='#sect4'>Generic character types</a></li>
+<li><a name='toc5' href='#sect5'>Unicode character properties</a></li>
+<li><a name='toc6' href='#sect6'>Simple assertions</a></li>
+</ul>
+<li><a name='toc7' href='#sect7'>Circumflex and Dollar</a></li>
+<li><a name='toc8' href='#sect8'>Full Stop (period, Dot)</a></li>
+<li><a name='toc9' href='#sect9'>Matching a Single Byte</a></li>
+<li><a name='toc10' href='#sect10'>Square Brackets and Character Classes</a></li>
+<li><a name='toc11' href='#sect11'>Posix Character Classes</a></li>
+<li><a name='toc12' href='#sect12'>Vertical Bar</a></li>
+<li><a name='toc13' href='#sect13'>Internal Option Setting</a></li>
+<li><a name='toc14' href='#sect14'>Subpatterns</a></li>
+<li><a name='toc15' href='#sect15'>Named Subpatterns</a></li>
+<li><a name='toc16' href='#sect16'>Repetition</a></li>
+<li><a name='toc17' href='#sect17'>Atomic Grouping and Possessive Quantifiers</a></li>
+<li><a name='toc18' href='#sect18'>Back References</a></li>
+<li><a name='toc19' href='#sect19'>Assertions</a></li>
+<ul>
+<li><a name='toc20' href='#sect20'>Lookahead assertions</a></li>
+<li><a name='toc21' href='#sect21'>Lookbehind assertions</a></li>
+<li><a name='toc22' href='#sect22'>Using multiple assertions</a></li>
+</ul>
+<li><a name='toc23' href='#sect23'>Conditional Subpatterns</a></li>
+<li><a name='toc24' href='#sect24'>Comments</a></li>
+<li><a name='toc25' href='#sect25'>Recursive Patterns</a></li>
+<li><a name='toc26' href='#sect26'>Subpatterns As Subroutines</a></li>
+<li><a name='toc27' href='#sect27'>Callouts</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcreperform.3.html b/spamfilter/Utilities/PCRE/man/html/pcreperform.3.html
new file mode 100644
index 0000000..a4fea50
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcreperform.3.html
@@ -0,0 +1,86 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Pcre Performance</a></h2>
+ <p>
+Certain items
+that may appear in regular expression patterns are more efficient than
+others. It is more efficient to use a character class like [aeiou] than
+a set of alternatives such as (a|e|i|o|u). In general, the simplest construction
+that provides the required behaviour is usually the most efficient. Jeffrey
+Friedl&rsquo;s book contains a lot of useful general discussion about optimizing
+regular expressions for efficient performance. This document contains a
+few observations about PCRE. <p>
+Using Unicode character properties (the \p,
+\P, and \X escapes) is slow, because PCRE has to scan a structure that contains
+data for over fifteen thousand characters whenever it needs a character&rsquo;s
+property. If you can find an alternative pattern that does not use character
+properties, it will probably be faster. <p>
+When a pattern begins with .* not
+in parentheses, or in parentheses that are not the subject of a backreference,
+and the PCRE_DOTALL option is set, the pattern is implicitly anchored by
+PCRE, since it can match only at the start of a subject string. However,
+if PCRE_DOTALL is not set, PCRE cannot make this optimization, because
+the . metacharacter does not then match a newline, and if the subject string
+contains newlines, the pattern may match from the character immediately
+following one of them instead of from the very start. For example, the pattern
+<p>
+ .*second<br>
+ <p>
+matches the subject "first\nand second" (where \n stands for a newline character),
+with the match starting at the seventh character. In order to do this, PCRE
+has to retry the match starting after every newline in the subject. <p>
+If you
+are using such a pattern with subject strings that do not contain newlines,
+the best performance is obtained by setting PCRE_DOTALL, or starting the
+pattern with ^.* to indicate explicit anchoring. That saves PCRE from having
+to scan along the subject looking for a newline to restart at. <p>
+Beware of
+patterns that contain nested indefinite repeats. These can take a long time
+to run when applied to a string that does not match. Consider the pattern
+fragment <p>
+ (a+)*<br>
+ <p>
+This can match "aaaa" in 33 different ways, and this number increases
+very rapidly as the string gets longer. (The * repeat can match 0, 1, 2,
+3, or 4 times, and for each of those cases other than 0, the + repeats
+can match different numbers of times.) When the remainder of the pattern
+is such that the entire match is going to fail, PCRE has in principle to
+try every possible variation, and this can take an extremely long time.
+<p>
+An optimization catches some of the more simple cases such as <p>
+ (a+)*b<br>
+ <p>
+where a literal character follows. Before embarking on the standard matching
+procedure, PCRE checks that there is a "b" later in the subject string,
+and if there is not, it fails the match immediately. However, when there
+is no following literal this optimization cannot be used. You can see the
+difference by comparing the behaviour of <p>
+ (a+)*\d<br>
+ <p>
+with the pattern above. The former gives a failure almost instantly when
+applied to a whole line of "a" characters, whereas the latter takes an
+appreciable time with strings longer than about 20 characters. <p>
+In many cases,
+the solution to this kind of performance issue is to use an atomic group
+or a possessive quantifier. <p>
+ Last updated: 09 September 2004 <br>
+Copyright (c) 1997-2004 University of Cambridge. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Pcre Performance</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcreposix.3.html b/spamfilter/Utilities/PCRE/man/html/pcreposix.3.html
new file mode 100644
index 0000000..0e7cafd
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcreposix.3.html
@@ -0,0 +1,187 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions.
+<h2><a name='sect1' href='#toc1'>Synopsis of Posix API</a></h2>
+ <p>
+<b>#include
+&lt;pcreposix.h&gt;</b> <p>
+<font size='-1'></font>
+ <br>
+<b>int regcomp(regex_t *<i>preg</i>, const char *<i>pattern</i>,</b> <b>int <i>cflags</i>);</b> <p>
+<br>
+<b>int regexec(regex_t *<i>preg</i>, const char *<i>string</i>,</b> <b>size_t <i>nmatch</i>, regmatch_t
+<i>pmatch</i>[], int <i>eflags</i>);</b> <p>
+<br>
+<b>size_t regerror(int <i>errcode</i>, const regex_t *<i>preg</i>,</b> <b>char *<i>errbuf</i>, size_t
+<i>errbuf_size</i>);</b> <p>
+<br>
+<b>void regfree(regex_t *<i>preg</i>);</b>
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+ <p>
+This set of functions provides
+a POSIX-style API to the PCRE regular expression package. See the <b>pcreapi</b>
+ documentation for a description of PCRE&rsquo;s native API, which contains additional
+functionality. <p>
+The functions described here are just wrapper functions that
+ultimately call the PCRE native API. Their prototypes are defined in the
+<b>pcreposix.h</b> header file, and on Unix systems the library itself is called
+<b>pcreposix.a</b>, so can be accessed by adding <b>-lpcreposix</b> to the command for
+linking an application that uses them. Because the POSIX functions call
+the native ones, it is also necessary to add <b>-lpcre</b>. <p>
+I have implemented only
+those option bits that can be reasonably mapped to PCRE native options.
+In addition, the options REG_EXTENDED and REG_NOSUB are defined with the
+value zero. They have no effect, but since programs that are written to
+the POSIX interface often use them, this makes it easier to slot in PCRE
+as a replacement library. Other POSIX options are not even defined. <p>
+When
+PCRE is called via these functions, it is only the API that is POSIX-like
+in style. The syntax and semantics of the regular expressions themselves
+are still those of Perl, subject to the setting of various PCRE options,
+as described below. "POSIX-like in style" means that the API approximates
+to the POSIX definition; it is not fully POSIX-compatible, and in multi-byte
+encoding domains it is probably even less compatible. <p>
+The header for these
+functions is supplied as <b>pcreposix.h</b> to avoid any potential clash with other
+POSIX libraries. It can, of course, be renamed or aliased as <b>regex.h</b>, which
+is the "correct" name. It provides two structure types, <i>regex_t</i> for compiled
+internal forms, and <i>regmatch_t</i> for returning captured substrings. It also
+defines some constants whose names start with "REG_"; these are used for
+setting options and identifying error codes. <p>
+
+<h2><a name='sect3' href='#toc3'>Compiling a Pattern</a></h2>
+ <p>
+The function
+<b>regcomp()</b> is called to compile a pattern into an internal form. The pattern
+is a C string terminated by a binary zero, and is passed in the argument
+<i>pattern</i>. The <i>preg</i> argument is a pointer to a <b>regex_t</b> structure that is used
+as a base for storing information about the compiled expression. <p>
+The argument
+<i>cflags</i> is either zero, or contains one or more of the bits defined by the
+following macros: <p>
+ REG_ICASE<br>
+ <p>
+The PCRE_CASELESS option is set when the expression is passed for compilation
+to the native function. <p>
+ REG_NEWLINE<br>
+ <p>
+The PCRE_MULTILINE option is set when the expression is passed for compilation
+to the native function. Note that this does <i>not</i> mimic the defined POSIX
+behaviour for REG_NEWLINE (see the following section). <p>
+In the absence of
+these flags, no options are passed to the native function. This means the
+the regex is compiled with PCRE default semantics. In particular, the way
+it handles newline characters in the subject string is the Perl way, not
+the POSIX way. Note that setting PCRE_MULTILINE has only <i>some</i> of the effects
+specified for REG_NEWLINE. It does not affect the way newlines are matched
+by . (they aren&rsquo;t) or by a negative class such as [^a] (they are). <p>
+The yield
+of <b>regcomp()</b> is zero on success, and non-zero otherwise. The <i>preg</i> structure
+is filled in on success, and one member of the structure is public: <i>re_nsub</i>
+contains the number of capturing subpatterns in the regular expression.
+Various error codes are defined in the header file.
+<h2><a name='sect4' href='#toc4'>Matching Newline Characters</a></h2>
+
+<p>
+This area is not simple, because POSIX and Perl take different views of
+things. It is not possible to get PCRE to obey POSIX semantics, but then
+PCRE was never intended to be a POSIX engine. The following table lists
+the different possibilities for matching newline characters in PCRE: <p>
+
+ Default Change with<br>
+ <p>
+ . matches newline no PCRE_DOTALL<br>
+ newline matches [^a] yes not changeable<br>
+ $ matches \n at end yes PCRE_DOLLARENDONLY<br>
+ $ matches \n in middle no PCRE_MULTILINE<br>
+ ^ matches \n in middle no PCRE_MULTILINE<br>
+ <p>
+This is the equivalent table for POSIX: <p>
+ Default
+ Change with<br>
+ <p>
+ . matches newline yes REG_NEWLINE<br>
+ newline matches [^a] yes REG_NEWLINE<br>
+ $ matches \n at end no REG_NEWLINE<br>
+ $ matches \n in middle no REG_NEWLINE<br>
+ ^ matches \n in middle no REG_NEWLINE<br>
+ <p>
+PCRE&rsquo;s behaviour is the same as Perl&rsquo;s, except that there is no equivalent
+for PCRE_DOLLAR_ENDONLY in Perl. In both PCRE and Perl, there is no way
+to stop newline from matching [^a]. <p>
+The default POSIX newline handling can
+be obtained by setting PCRE_DOTALL and PCRE_DOLLAR_ENDONLY, but there is
+no way to make PCRE behave exactly as for the REG_NEWLINE action.
+<h2><a name='sect5' href='#toc5'>Matching
+a Pattern</a></h2>
+ <p>
+The function <b>regexec()</b> is called to match a compiled pattern
+<i>preg</i> against a given <i>string</i>, which is terminated by a zero byte, subject
+to the options in <i>eflags</i>. These can be: <p>
+ REG_NOTBOL<br>
+ <p>
+The PCRE_NOTBOL option is set when calling the underlying PCRE matching
+function. <p>
+ REG_NOTEOL<br>
+ <p>
+The PCRE_NOTEOL option is set when calling the underlying PCRE matching
+function. <p>
+The portion of the string that was matched, and also any captured
+substrings, are returned via the <i>pmatch</i> argument, which points to an array
+of <i>nmatch</i> structures of type <i>regmatch_t</i>, containing the members <i>rm_so</i> and
+<i>rm_eo</i>. These contain the offset to the first character of each substring
+and the offset to the first character after the end of each substring,
+respectively. The 0th element of the vector relates to the entire portion
+of <i>string</i> that was matched; subsequent elements relate to the capturing
+subpatterns of the regular expression. Unused entries in the array have
+both structure members set to -1. <p>
+A successful match yields a zero return;
+various error codes are defined in the header file, of which REG_NOMATCH
+is the "expected" failure code.
+<h2><a name='sect6' href='#toc6'>Error Messages</a></h2>
+ <p>
+The <b>regerror()</b> function
+maps a non-zero errorcode from either <b>regcomp()</b> or <b>regexec()</b> to a printable
+message. If <i>preg</i> is not NULL, the error should have arisen from the use
+of that structure. A message terminated by a binary zero is placed in <i>errbuf</i>.
+The length of the message, including the zero, is limited to <i>errbuf_size</i>.
+The yield of the function is the size of buffer needed to hold the whole
+message.
+<h2><a name='sect7' href='#toc7'>Memory Usage</a></h2>
+ <p>
+Compiling a regular expression causes memory to
+be allocated and associated with the <i>preg</i> structure. The function <b>regfree()</b>
+frees all such memory, after which <i>preg</i> may no longer be used as a compiled
+expression.
+<h2><a name='sect8' href='#toc8'>Author</a></h2>
+ <p>
+Philip Hazel &lt;ph10@cam.ac.uk&gt; <br>
+University Computing Service, <br>
+Cambridge CB2 3QG, England. <p>
+ Last updated: 07 September 2004 <br>
+Copyright (c) 1997-2004 University of Cambridge. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis of Posix API</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+<li><a name='toc3' href='#sect3'>Compiling a Pattern</a></li>
+<li><a name='toc4' href='#sect4'>Matching Newline Characters</a></li>
+<li><a name='toc5' href='#sect5'>Matching a Pattern</a></li>
+<li><a name='toc6' href='#sect6'>Error Messages</a></li>
+<li><a name='toc7' href='#sect7'>Memory Usage</a></li>
+<li><a name='toc8' href='#sect8'>Author</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcresample.3.html b/spamfilter/Utilities/PCRE/man/html/pcresample.3.html
new file mode 100644
index 0000000..6fad4a9
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcresample.3.html
@@ -0,0 +1,72 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Pcre Sample Program</a></h2>
+ <p>
+A simple,
+complete demonstration program, to get you started with using PCRE, is
+supplied in the file <i>pcredemo.c</i> in the PCRE distribution. <p>
+The program compiles
+the regular expression that is its first argument, and matches it against
+the subject string in its second argument. No PCRE options are set, and
+default character tables are used. If matching succeeds, the program outputs
+the portion of the subject that matched, together with the contents of
+any captured substrings. <p>
+If the -g option is given on the command line, the
+program then goes on to check for further matches of the same regular expression
+in the same subject string. The logic is a little bit tricky because of
+the possibility of matching an empty string. Comments in the code explain
+what is going on. <p>
+If PCRE is installed in the standard include and library
+directories for your system, you should be able to compile the demonstration
+program using this command: <p>
+ gcc -o pcredemo pcredemo.c -lpcre<br>
+ <p>
+If PCRE is installed elsewhere, you may need to add additional options
+to the command line. For example, on a Unix-like system that has PCRE installed
+in <i>/usr/local</i>, you can compile the demonstration program using a command
+like this: <p>
+ gcc -o pcredemo -I/usr/local/include pcredemo.c \<br>
+ -L/usr/local/lib -lpcre<br>
+ <p>
+Once you have compiled the demonstration program, you can run simple tests
+like this: <p>
+ ./pcredemo &rsquo;cat|dog&rsquo; &rsquo;the cat sat on the mat&rsquo;<br>
+ ./pcredemo -g &rsquo;cat|dog&rsquo; &rsquo;the dog sat on the cat&rsquo;<br>
+ <p>
+Note that there is a much more comprehensive test program, called <b>pcretest</b>,
+ which supports many more facilities for testing regular expressions and
+the PCRE library. The <b>pcredemo</b> program is provided as a simple coding example.
+<p>
+On some operating systems (e.g. Solaris), when PCRE is not installed in the
+standard library directory, you may get an error like this when you try
+to run <b>pcredemo</b>: <p>
+ ld.so.1: a.out: fatal: libpcre.so.0: open failed: No such
+file or directory<br>
+ <p>
+This is caused by the way shared library support works on those systems.
+You need to add <p>
+ -R/usr/local/lib<br>
+ <p>
+(for example) to the compile command to get round this problem. <p>
+ Last updated:
+09 September 2004 <br>
+Copyright (c) 1997-2004 University of Cambridge. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Pcre Sample Program</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcretest.1.html b/spamfilter/Utilities/PCRE/man/html/pcretest.1.html
new file mode 100644
index 0000000..e96293e
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcretest.1.html
@@ -0,0 +1,433 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRETEST(1) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+pcretest - a program for testing Perl-compatible regular expressions.
+
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+ <p>
+<b>pcretest "[-C] [-d] [-i] [-m] [-o osize] [-p] [-t] [source]"</b> <b>[destination]</b>
+<p>
+<b>pcretest</b> was written as a test program for the PCRE regular expression
+library itself, but it can also be used for experimenting with regular
+expressions. This document describes the features of the test program; for
+details of the regular expressions themselves, see the <b>pcrepattern</b> documentation.
+For details of the PCRE library function calls and their options, see the
+ <b>pcreapi</b> documentation.
+<h2><a name='sect2' href='#toc2'>Options</a></h2>
+
+<dl>
+
+<dt><b>-C</b> </dt>
+<dd>Output the version number of the PCRE
+library, and all available information about the optional features that
+are included, and then exit. </dd>
+
+<dt><b>-d</b> </dt>
+<dd>Behave as if each regex had the <b>/D</b> (debug)
+modifier; the internal form is output after compilation. </dd>
+
+<dt><b>-i</b> </dt>
+<dd>Behave as if
+each regex had the <b>/I</b> modifier; information about the compiled pattern
+is given after compilation. </dd>
+
+<dt><b>-m</b> </dt>
+<dd>Output the size of each compiled pattern after
+it has been compiled. This is equivalent to adding <b>/M</b> to each regular expression.
+For compatibility with earlier versions of pcretest, <b>-s</b> is a synonym for
+<b>-m</b>. </dd>
+
+<dt><b>-o</b> <i>osize</i> </dt>
+<dd>Set the number of elements in the output vector that is used
+when calling <b>pcre_exec()</b> to be <i>osize</i>. The default value is 45, which is
+enough for 14 capturing subexpressions. The vector size can be changed for
+individual matching calls by including \O in the data line (see below). </dd>
+
+<dt><b>-p</b>
+</dt>
+<dd>Behave as if each regex has <b>/P</b> modifier; the POSIX wrapper API is used
+to call PCRE. None of the other options has any effect when <b>-p</b> is set. </dd>
+
+<dt><b>-t</b> </dt>
+<dd>Run
+each compile, study, and match many times with a timer, and output resulting
+time per compile or match (in milliseconds). Do not set <b>-m</b> with <b>-t</b>, because
+you will then get the size output a zillion times, and the timing will
+be distorted. </dd>
+</dl>
+
+<h2><a name='sect3' href='#toc3'>Description</a></h2>
+ <p>
+If <b>pcretest</b> is given two filename arguments,
+it reads from the first and writes to the second. If it is given only one
+filename argument, it reads from that file and writes to stdout. Otherwise,
+it reads from stdin and writes to stdout, and prompts for each line of
+input, using "re&gt;" to prompt for regular expressions, and "data&gt;" to prompt
+for data lines. <p>
+The program handles any number of sets of input on a single
+input file. Each set starts with a regular expression, and continues with
+any number of data lines to be matched against the pattern. <p>
+Each data line
+is matched separately and independently. If you want to do multiple-line
+matches, you have to use the \n escape sequence in a single line of input
+to encode the newline characters. The maximum length of data line is 30,000
+characters. <p>
+An empty line signals the end of the data lines, at which point
+a new regular expression is read. The regular expressions are given enclosed
+in any non-alphanumeric delimiters other than backslash, for example <p>
+ /(a|bc)x+yz/<br>
+ <p>
+White space before the initial delimiter is ignored. A regular expression
+may be continued over several input lines, in which case the newline characters
+are included within it. It is possible to include the delimiter within the
+pattern by escaping it, for example <p>
+ /abc\/def/<br>
+ <p>
+If you do so, the escape and the delimiter form part of the pattern, but
+since delimiters are always non-alphanumeric, this does not affect its interpretation.
+If the terminating delimiter is immediately followed by a backslash, for
+example, <p>
+ /abc/\<br>
+ <p>
+then a backslash is added to the end of the pattern. This is done to provide
+a way of testing the error condition that arises if a pattern finishes
+with a backslash, because <p>
+ /abc\/<br>
+ <p>
+is interpreted as the first line of a pattern that starts with "abc/",
+causing pcretest to read the next line as a continuation of the regular
+expression.
+<h2><a name='sect4' href='#toc4'>Pattern Modifiers</a></h2>
+ <p>
+A pattern may be followed by any number
+of modifiers, which are mostly single characters. Following Perl usage,
+these are referred to below as, for example, "the <b>/i</b> modifier", even though
+the delimiter of the pattern need not always be a slash, and no slash is
+used when writing modifiers. Whitespace may appear between the final pattern
+delimiter and the first modifier, and between the modifiers themselves.
+<p>
+The <b>/i</b>, <b>/m</b>, <b>/s</b>, and <b>/x</b> modifiers set the PCRE_CASELESS, PCRE_MULTILINE,
+PCRE_DOTALL, or PCRE_EXTENDED options, respectively, when <b>pcre_compile()</b>
+is called. These four modifier letters have the same effect as they do in
+Perl. For example: <p>
+ /caseless/i<br>
+ <p>
+The following table shows additional modifiers for setting PCRE options
+that do not correspond to anything in Perl: <p>
+ <b>/A</b> PCRE_ANCHORED<br>
+ <b>/C</b> PCRE_AUTO_CALLOUT<br>
+ <b>/E</b> PCRE_DOLLAR_ENDONLY<br>
+ <b>/N</b> PCRE_NO_AUTO_CAPTURE<br>
+ <b>/U</b> PCRE_UNGREEDY<br>
+ <b>/X</b> PCRE_EXTRA<br>
+ <p>
+Searching for all possible matches within each subject string can be requested
+by the <b>/g</b> or <b>/G</b> modifier. After finding a match, PCRE is called again to
+search the remainder of the subject string. The difference between <b>/g</b> and
+<b>/G</b> is that the former uses the <i>startoffset</i> argument to <b>pcre_exec()</b> to start
+searching at a new point within the entire string (which is in effect what
+Perl does), whereas the latter passes over a shortened substring. This makes
+a difference to the matching process if the pattern begins with a lookbehind
+assertion (including \b or \B). <p>
+If any call to <b>pcre_exec()</b> in a <b>/g</b> or <b>/G</b> sequence
+matches an empty string, the next call is done with the PCRE_NOTEMPTY and
+PCRE_ANCHORED flags set in order to search for another, non-empty, match
+at the same point. If this second match fails, the start offset is advanced
+by one, and the normal match is retried. This imitates the way Perl handles
+such cases when using the <b>/g</b> modifier or the <b>split()</b> function. <p>
+There are
+yet more modifiers for controlling the way <b>pcretest</b> operates. <p>
+The <b>/+</b> modifier
+requests that as well as outputting the substring that matched the entire
+pattern, pcretest should in addition output the remainder of the subject
+string. This is useful for tests where the subject contains multiple copies
+of the same substring. <p>
+The <b>/L</b> modifier must be followed directly by the
+name of a locale, for example, <p>
+ /pattern/Lfr_FR<br>
+ <p>
+For this reason, it must be the last modifier. The given locale is set,
+<b>pcre_maketables()</b> is called to build a set of character tables for the
+locale, and this is then passed to <b>pcre_compile()</b> when compiling the regular
+expression. Without an <b>/L</b> modifier, NULL is passed as the tables pointer;
+that is, <b>/L</b> applies only to the expression on which it appears. <p>
+The <b>/I</b> modifier
+requests that <b>pcretest</b> output information about the compiled pattern (whether
+it is anchored, has a fixed first character, and so on). It does this by
+calling <b>pcre_fullinfo()</b> after compiling a pattern. If the pattern is studied,
+the results of that are also output. <p>
+The <b>/D</b> modifier is a PCRE debugging
+feature, which also assumes <b>/I</b>. It causes the internal form of compiled
+regular expressions to be output after compilation. If the pattern was studied,
+the information returned is also output. <p>
+The <b>/F</b> modifier causes <b>pcretest</b>
+to flip the byte order of the fields in the compiled pattern that contain
+2-byte and 4-byte numbers. This facility is for testing the feature in PCRE
+that allows it to execute patterns that were compiled on a host with a
+different endianness. This feature is not available when the POSIX interface
+to PCRE is being used, that is, when the <b>/P</b> pattern modifier is specified.
+See also the section about saving and reloading compiled patterns below.
+<p>
+The <b>/S</b> modifier causes <b>pcre_study()</b> to be called after the expression has
+been compiled, and the results used when the expression is matched. <p>
+The
+<b>/M</b> modifier causes the size of memory block used to hold the compiled pattern
+to be output. <p>
+The <b>/P</b> modifier causes <b>pcretest</b> to call PCRE via the POSIX
+wrapper API rather than its native API. When this is done, all other modifiers
+except <b>/i</b>, <b>/m</b>, and <b>/+</b> are ignored. REG_ICASE is set if <b>/i</b> is present, and
+REG_NEWLINE is set if <b>/m</b> is present. The wrapper functions force PCRE_DOLLAR_ENDONLY
+always, and PCRE_DOTALL unless REG_NEWLINE is set. <p>
+The <b>/8</b> modifier causes
+<b>pcretest</b> to call PCRE with the PCRE_UTF8 option set. This turns on support
+for UTF-8 character handling in PCRE, provided that it was compiled with
+this support enabled. This modifier also causes any non-printing characters
+in output strings to be printed using the \x{hh...} notation if they are valid
+UTF-8 sequences. <p>
+If the <b>/?</b> modifier is used with <b>/8</b>, it causes <b>pcretest</b> to
+call <b>pcre_compile()</b> with the PCRE_NO_UTF8_CHECK option, to suppress the
+checking of the string for UTF-8 validity.
+<h2><a name='sect5' href='#toc5'>Data Lines</a></h2>
+ <p>
+Before each data
+line is passed to <b>pcre_exec()</b>, leading and trailing whitespace is removed,
+and it is then scanned for \ escapes. Some of these are pretty esoteric features,
+intended for checking out some of the more complicated features of PCRE.
+If you are just testing "ordinary" regular expressions, you probably don&rsquo;t
+need any of these. The following escapes are recognized: <p>
+ \a alarm
+(= BEL)<br>
+ \b backspace<br>
+ \e escape<br>
+ \f formfeed<br>
+ \n newline<br>
+ \r carriage return<br>
+ \t tab<br>
+ \v vertical tab<br>
+ \nnn octal character (up to 3 octal digits)<br>
+ \xhh hexadecimal character (up to 2 hex digits)<br>
+ \x{hh...} hexadecimal character, any number of digits<br>
+ in UTF-8 mode<br>
+ \A pass the PCRE_ANCHORED option to <b>pcre_exec()</b><br>
+ \B pass the PCRE_NOTBOL option to <b>pcre_exec()</b><br>
+ \Cdd call pcre_copy_substring() for substring dd<br>
+ after a successful match (number less than 32)<br>
+ \Cname call pcre_copy_named_substring() for substring<br>
+ "name" after a successful match (name termin-<br>
+ ated by next non alphanumeric character)<br>
+ \C+ show the current captured substrings at callout<br>
+ time<br>
+ \C- do not supply a callout function<br>
+ \C!n return 1 instead of 0 when callout number n is<br>
+ reached<br>
+ \C!n!m return 1 instead of 0 when callout number n is<br>
+ reached for the nth time<br>
+ \C*n pass the number n (may be negative) as callout<br>
+ data; this is used as the callout return value<br>
+ \Gdd call pcre_get_substring() for substring dd<br>
+ after a successful match (number less than 32)<br>
+ \Gname call pcre_get_named_substring() for substring<br>
+ "name" after a successful match (name termin-<br>
+ ated by next non-alphanumeric character)<br>
+ \L call pcre_get_substringlist() after a<br>
+ successful match<br>
+ \M discover the minimum MATCH_LIMIT setting<br>
+ \N pass the PCRE_NOTEMPTY option to <b>pcre_exec()</b><br>
+ \Odd set the size of the output vector passed to<br>
+ <b>pcre_exec()</b> to dd (any number of digits)<br>
+ \P pass the PCRE_PARTIAL option to <b>pcre_exec()</b><br>
+ \S output details of memory get/free calls during matching<br>
+ \Z pass the PCRE_NOTEOL option to <b>pcre_exec()</b><br>
+ \? pass the PCRE_NO_UTF8_CHECK option to<br>
+ <b>pcre_exec()</b><br>
+ \&gt;dd start the match at offset dd (any number of digits);<br>
+ this sets the <i>startoffset</i> argument for <b>pcre_exec()</b><br>
+ <p>
+A backslash followed by anything else just escapes the anything else. If
+the very last character is a backslash, it is ignored. This gives a way
+of passing an empty line as data, since a real empty line terminates the
+data input. <p>
+If \M is present, <b>pcretest</b> calls <b>pcre_exec()</b> several times, with
+different values in the <i>match_limit</i> field of the <b>pcre_extra</b> data structure,
+until it finds the minimum number that is needed for <b>pcre_exec()</b> to complete.
+This number is a measure of the amount of recursion and backtracking that
+takes place, and checking it out can be instructive. For most simple matches,
+the number is quite small, but for patterns with very large numbers of
+matching possibilities, it can become large very quickly with increasing
+length of subject string. <p>
+When \O is used, the value specified may be higher
+or lower than the size set by the <b>-O</b> command line option (or defaulted to
+45); \O applies only to the call of <b>pcre_exec()</b> for the line in which it
+appears. <p>
+If the <b>/P</b> modifier was present on the pattern, causing the POSIX
+wrapper API to be used, only \B and \Z have any effect, causing REG_NOTBOL
+and REG_NOTEOL to be passed to <b>regexec()</b> respectively. <p>
+The use of \x{hh...}
+to represent UTF-8 characters is not dependent on the use of the <b>/8</b> modifier
+on the pattern. It is recognized always. There may be any number of hexadecimal
+digits inside the braces. The result is from one to six bytes, encoded according
+to the UTF-8 rules.
+<h2><a name='sect6' href='#toc6'>Output from Pcretest</a></h2>
+ <p>
+When a match succeeds, pcretest
+outputs the list of captured substrings that <b>pcre_exec()</b> returns, starting
+with number 0 for the string that matched the whole pattern. Otherwise,
+it outputs "No match" or "Partial match" when <b>pcre_exec()</b> returns PCRE_ERROR_NOMATCH
+or PCRE_ERROR_PARTIAL, respectively, and otherwise the PCRE negative error
+number. Here is an example of an interactive pcretest run. <p>
+ $ pcretest<br>
+ PCRE version 5.00 07-Sep-2004<br>
+ <p>
+ re&gt; /^abc(\d+)/<br>
+ data&gt; abc123<br>
+ 0: abc123<br>
+ 1: 123<br>
+ data&gt; xyz<br>
+ No match<br>
+ <p>
+If the strings contain any non-printing characters, they are output as
+\0x escapes, or as \x{...} escapes if the <b>/8</b> modifier was present on the pattern.
+If the pattern has the <b>/+</b> modifier, the output for substring 0 is followed
+by the the rest of the subject string, identified by "0+" like this: <p>
+
+ re&gt; /cat/+<br>
+ data&gt; cataract<br>
+ 0: cat<br>
+ 0+ aract<br>
+ <p>
+If the pattern has the <b>/g</b> or <b>/G</b> modifier, the results of successive matching
+attempts are output in sequence, like this: <p>
+ re&gt; /\Bi(\w\w)/g<br>
+ data&gt; Mississippi<br>
+ 0: iss<br>
+ 1: ss<br>
+ 0: iss<br>
+ 1: ss<br>
+ 0: ipp<br>
+ 1: pp<br>
+ <p>
+"No match" is output only if the first match attempt fails. <p>
+If any of the
+sequences <b>\C</b>, <b>\G</b>, or <b>\L</b> are present in a data line that is successfully matched,
+the substrings extracted by the convenience functions are output with C,
+G, or L after the string number instead of a colon. This is in addition
+to the normal full list. The string length (that is, the return from the
+extraction function) is given in parentheses after each string for <b>\C</b> and
+<b>\G</b>. <p>
+Note that while patterns can be continued over several lines (a plain
+"&gt;" prompt is used for continuations), data lines may not. However newlines
+can be included in data by means of the \n escape.
+<h2><a name='sect7' href='#toc7'>Callouts</a></h2>
+ <p>
+If the pattern
+contains any callout requests, <b>pcretest</b>&rsquo;s callout function is called during
+matching. By default, it displays the callout number, the start and current
+positions in the text at the callout time, and the next pattern item to
+be tested. For example, the output <p>
+ ---&gt;pqrabcdef<br>
+ 0 ^ ^ \d<br>
+ <p>
+indicates that callout number 0 occurred for a match attempt starting
+at the fourth character of the subject string, when the pointer was at
+the seventh character of the data, and when the next pattern item was \d.
+Just one circumflex is output if the start and current positions are the
+same. <p>
+Callouts numbered 255 are assumed to be automatic callouts, inserted
+as a result of the <b>/C</b> pattern modifier. In this case, instead of showing
+the callout number, the offset in the pattern, preceded by a plus, is output.
+For example: <p>
+ re&gt; /\d?[A-E]\*/C<br>
+ data&gt; E*<br>
+ ---&gt;E*<br>
+ +0 ^ \d?<br>
+ +3 ^ [A-E]<br>
+ +8 ^^ \*<br>
+ +10 ^ ^<br>
+ 0: E*<br>
+ <p>
+The callout function in <b>pcretest</b> returns zero (carry on matching) by default,
+but you can use an \C item in a data line (as described above) to change
+this. <p>
+Inserting callouts can be helpful when using <b>pcretest</b> to check complicated
+regular expressions. For further information about callouts, see the <b>pcrecallout</b>
+ documentation.
+<h2><a name='sect8' href='#toc8'>Saving and Reloading Compiled Patterns</a></h2>
+ <p>
+The facilities
+described in this section are not available when the POSIX inteface to
+PCRE is being used, that is, when the <b>/P</b> pattern modifier is specified.
+<p>
+When the POSIX interface is not in use, you can cause <b>pcretest</b> to write
+a compiled pattern to a file, by following the modifiers with &gt; and a file
+name. For example: <p>
+ /pattern/im &gt;/some/file<br>
+ <p>
+See the <b>pcreprecompile</b> documentation for a discussion about saving and
+re-using compiled patterns. <p>
+The data that is written is binary. The first
+eight bytes are the length of the compiled pattern data followed by the
+length of the optional study data, each written as four bytes in big-endian
+order (most significant byte first). If there is no study data (either the
+pattern was not studied, or studying did not return any data), the second
+length is zero. The lengths are followed by an exact copy of the compiled
+pattern. If there is additional study data, this follows immediately after
+the compiled pattern. After writing the file, <b>pcretest</b> expects to read a
+new pattern. <p>
+A saved pattern can be reloaded into <b>pcretest</b> by specifing
+&lt; and a file name instead of a pattern. The name of the file must not contain
+a &lt; character, as otherwise <b>pcretest</b> will interpret the line as a pattern
+delimited by &lt; characters. For example: <p>
+ re&gt; &lt;/some/file<br>
+ Compiled regex loaded from /some/file<br>
+ No study data<br>
+ <p>
+When the pattern has been loaded, <b>pcretest</b> proceeds to read data lines
+in the usual way. <p>
+You can copy a file written by <b>pcretest</b> to a different
+host and reload it there, even if the new host has opposite endianness
+to the one on which the pattern was compiled. For example, you can compile
+on an i86 machine and run on a SPARC machine. <p>
+File names for saving and
+reloading can be absolute or relative, but note that the shell facility
+of expanding a file name that starts with a tilde (~) is not available.
+<p>
+The ability to save and reload files in <b>pcretest</b> is intended for testing
+and experimentation. It is not intended for production use because only
+a single pattern can be written to a file. Furthermore, there is no facility
+for supplying custom character tables for use with a reloaded pattern. If
+the original pattern was compiled with custom tables, an attempt to match
+a subject string using a reloaded pattern is likely to cause <b>pcretest</b> to
+crash. Finally, if you attempt to load a file that is not in the correct
+format, the result is undefined.
+<h2><a name='sect9' href='#toc9'>Author</a></h2>
+ <p>
+Philip Hazel &lt;ph10@cam.ac.uk&gt; <br>
+University Computing Service, <br>
+Cambridge CB2 3QG, England. <p>
+ Last updated: 10 September 2004 <br>
+Copyright (c) 1997-2004 University of Cambridge. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Options</a></li>
+<li><a name='toc3' href='#sect3'>Description</a></li>
+<li><a name='toc4' href='#sect4'>Pattern Modifiers</a></li>
+<li><a name='toc5' href='#sect5'>Data Lines</a></li>
+<li><a name='toc6' href='#sect6'>Output from Pcretest</a></li>
+<li><a name='toc7' href='#sect7'>Callouts</a></li>
+<li><a name='toc8' href='#sect8'>Saving and Reloading Compiled Patterns</a></li>
+<li><a name='toc9' href='#sect9'>Author</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/manifest/pcre-5.0-doc.mft b/spamfilter/Utilities/PCRE/manifest/pcre-5.0-doc.mft
new file mode 100644
index 0000000..a2ac5c9
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/manifest/pcre-5.0-doc.mft
@@ -0,0 +1,58 @@
+man/html/pcre.3.html
+man/html/pcre_compile.3.html
+man/html/pcre_config.3.html
+man/html/pcre_copy_named_substring.3.html
+man/html/pcre_copy_substring.3.html
+man/html/pcre_exec.3.html
+man/html/pcre_free_substring.3.html
+man/html/pcre_free_substring_list.3.html
+man/html/pcre_fullinfo.3.html
+man/html/pcre_get_named_substring.3.html
+man/html/pcre_get_stringnumber.3.html
+man/html/pcre_get_substring.3.html
+man/html/pcre_get_substring_list.3.html
+man/html/pcre_info.3.html
+man/html/pcre_maketables.3.html
+man/html/pcre_study.3.html
+man/html/pcre_version.3.html
+man/html/pcreapi.3.html
+man/html/pcrebuild.3.html
+man/html/pcrecallout.3.html
+man/html/pcrecompat.3.html
+man/html/pcregrep.1.html
+man/html/pcrepattern.3.html
+man/html/pcreperform.3.html
+man/html/pcreposix.3.html
+man/html/pcresample.3.html
+man/html/pcretest.1.html
+man/man1/pcregrep.1
+man/man1/pcretest.1
+man/man3/pcre.3
+man/man3/pcre_compile.3
+man/man3/pcre_config.3
+man/man3/pcre_copy_named_substring.3
+man/man3/pcre_copy_substring.3
+man/man3/pcre_exec.3
+man/man3/pcre_free_substring.3
+man/man3/pcre_free_substring_list.3
+man/man3/pcre_fullinfo.3
+man/man3/pcre_get_named_substring.3
+man/man3/pcre_get_stringnumber.3
+man/man3/pcre_get_substring.3
+man/man3/pcre_get_substring_list.3
+man/man3/pcre_info.3
+man/man3/pcre_maketables.3
+man/man3/pcre_study.3
+man/man3/pcre_version.3
+man/man3/pcreapi.3
+man/man3/pcrebuild.3
+man/man3/pcrecallout.3
+man/man3/pcrecompat.3
+man/man3/pcrepattern.3
+man/man3/pcreperform.3
+man/man3/pcreposix.3
+man/man3/pcresample.3
+man/pdf/pcre-man.pdf
+man/ps/pcre-man.ps.gz
+manifest/pcre-5.0-doc.mft
+manifest/pcre-5.0-doc.ver
diff --git a/spamfilter/Utilities/PCRE/manifest/pcre-5.0-doc.ver b/spamfilter/Utilities/PCRE/manifest/pcre-5.0-doc.ver
new file mode 100644
index 0000000..6a4adaa
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/manifest/pcre-5.0-doc.ver
@@ -0,0 +1,2 @@
+Pcre 5.0: Documentation
+Pcre: Perl-compatible regular-expression library
diff --git a/spamfilter/Utilities/PCRE/manifest/pcre-5.0-lib.mft b/spamfilter/Utilities/PCRE/manifest/pcre-5.0-lib.mft
new file mode 100644
index 0000000..aabb128
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/manifest/pcre-5.0-lib.mft
@@ -0,0 +1,12 @@
+include/pcre.h
+include/pcreposix.h
+lib/libpcre-bcc.lib
+lib/libpcre.def
+lib/libpcre.dll.a
+lib/libpcre.lib
+lib/libpcreposix-bcc.lib
+lib/libpcreposix.def
+lib/libpcreposix.dll.a
+lib/libpcreposix.lib
+manifest/pcre-5.0-lib.mft
+manifest/pcre-5.0-lib.ver
diff --git a/spamfilter/Utilities/PCRE/manifest/pcre-5.0-lib.ver b/spamfilter/Utilities/PCRE/manifest/pcre-5.0-lib.ver
new file mode 100644
index 0000000..288da85
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/manifest/pcre-5.0-lib.ver
@@ -0,0 +1,2 @@
+Pcre 5.0: Developer files
+Pcre: Perl-compatible regular-expression library
diff --git a/spamfilter/Utilities/PCRE/pcre_subst/pcre_subst.3 b/spamfilter/Utilities/PCRE/pcre_subst/pcre_subst.3
new file mode 100644
index 0000000..b3c269b
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/pcre_subst/pcre_subst.3
@@ -0,0 +1,73 @@
+.TH PCRE 3
+.SH NAME
+pcre_subst - Perl-compatible regular expression subsitution.
+.SH SYNOPSIS
+.B #include <pcre.h>
+.br
+.B #include <pcre_subst.h>
+.PP
+.SM
+.br
+int pcre_exec(const pcre *\fIcode\fR, const pcre_extra *\fIextra\fR,
+.ti +5n
+const char *\fIsubject\fR, int \fIlength\fR, int \fIstartoffset\fR,
+.ti +5n
+int \fIoptions\fR, char *\fIreplacement\fR);
+
+
+
+.SH DESCRIPTION
+\fBpcre_subst\fR is a convenience routine that calls \fIpcre_exec\fR,
+and returns a freshly allocated string based on the \fIsubject\fR with
+the \fIreplacement\fR action applied. Unlike \fIsubject\fR, whics is
+passed as a byte array with a length, \fIreplacement\fR is expected to
+be a zero terminated string (most users will just pass \fIstrlen(subject)\fR
+as the \fIlength\fR).
+
+.br
+If no match is found, pcre_subst returns NULL. The returned string is zero
+terminated (note that \fIsubject\fR doesn't have to be). For information
+on the \fIcode\fR, \fIextra\fR, \fIsubject\fR, \fIlength\fR,
+\fIstartoffset\fR and \fIoptions\fR parameters, please see \fBpcre(3)\fR.
+
+.SH REPLACEMENT STRING
+The replacement string supports a subset of the PERL replacement string.
+In particular, \\1 style escapes are not supported (actually, only the
+$1 style is handled).
+
+.SH EXAMPLE
+.Bd -literal -compact
+#include <stdio.h>
+#include <pcre.h>
+#include "pcre_subst.h"
+
+int
+main()
+{
+ char *pat = "quick\\\\s(\\\\w+)\\\\s(fox)";
+ char *rep = "$1ish $2";
+ char *str = "The quick brown foxy";
+ char *newstr;
+ const char *err;
+ int erroff;
+ pcre_extra *extra;
+ pcre *p = pcre_compile(pat, 0, &err, &erroff, NULL);
+ if (p == NULL) {
+ fprintf(stderr, "%s at %d\\n", err, erroff);
+ exit(1);
+ }
+ extra = pcre_study(p, 0, &err);
+ if (err != NULL)
+ fprintf(stderr, "Study %s: %s\\n", pat, err);
+ newstr = pcre_subst(ppat, extra, str, strlen(str),
+ 0, 0, rep);
+ if (newstr) {
+ printf("New string: %s\\n", newstr);
+ pcre_free(newstr);
+ };
+ return 0;
+}
+.Ed
+
+.SH SEE ALSO
+pcre(3)
diff --git a/spamfilter/Utilities/PCRE/pcre_subst/pcre_subst.c b/spamfilter/Utilities/PCRE/pcre_subst/pcre_subst.c
new file mode 100644
index 0000000..3f34f97
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/pcre_subst/pcre_subst.c
@@ -0,0 +1,191 @@
+/*************************************************
+* PCRE string replacement *
+*************************************************/
+
+/*
+PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+pcre_subst is a wrapper around pcre_exec designed to make it easier to
+perform PERL style replacements with PCRE.
+
+Written by: Bert Driehuis <driehuis@playbeing.org>
+
+ Copyright (c) 2000 Bert Driehuis
+
+-----------------------------------------------------------------------------
+Permission is granted to anyone to use this software for any purpose on any
+computer system, and to redistribute it freely, subject to the following
+restrictions:
+
+1. This software is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+2. The origin of this software must not be misrepresented, either by
+ explicit claim or by omission.
+
+3. Altered versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+
+4. If PCRE is embedded in any software that is released under the GNU
+ General Purpose Licence (GPL), then the terms of that licence shall
+ supersede any condition above with which it is incompatible.
+*/
+
+#include <stdio.h>
+#include <ctype.h>
+#include <string.h>
+#include <pcre.h>
+#include "pcre_subst.h"
+
+#define MAXCAPTURE 50
+
+#ifdef DEBUG_PCRE_SUBST
+static void
+dumpstr(const char *str, int len, int start, int end)
+{
+ int i;
+ for (i = 0; i < strlen(str); i++) {
+ if (i >= start && i < end)
+ putchar(str[i]);
+ else
+ putchar('-');
+ }
+ putchar('\n');
+}
+
+static void
+dumpmatch(const char *str, int len, const char *rep, int nmat, const int *ovec)
+{
+ int i;
+ printf("%s Input\n", str);
+ printf("nmat=%d", nmat);
+ for (i = 0; i < nmat * 2; i++)
+ printf(" %d", ovec[i]);
+ printf("\n");
+ for (i = 0; i < nmat * 2; i += 2)
+ dumpstr(str, len, ovec[i], ovec[i+1]);
+ printf("\n");
+}
+#endif
+
+static int
+findreplen(const char *rep, int nmat, const int *replen)
+{
+ int len = 0;
+ int val;
+ char *cp = (char *)rep;
+ while(*cp) {
+ if (*cp == '$' && isdigit(cp[1])) {
+ val = strtoul(&cp[1], &cp, 10);
+ if (val && val <= nmat + 1)
+ len += replen[val -1];
+ else
+ fprintf(stderr, "repl %d out of range\n", val);
+ } else {
+ cp++;
+ len++;
+ }
+ }
+ return len;
+}
+
+static void
+doreplace(char *out, const char *rep, int nmat, int *replen, const char **repstr)
+{
+ int val;
+ char *cp = (char *)rep;
+ while(*cp) {
+ if (*cp == '$' && isdigit(cp[1])) {
+ val = strtoul(&cp[1], &cp, 10);
+ if (val && val <= nmat + 1) {
+ strncpy(out, repstr[val - 1], replen[val - 1]);
+ out += replen[val -1];
+ }
+ } else {
+ *out++ = *cp++;
+ }
+ }
+}
+
+static char *
+edit(const char *str, int len, const char *rep, int nmat, const int *ovec)
+{
+ int i, slen, rlen;
+ const int *mvec = ovec;
+ char *res, *cp;
+ int replen[MAXCAPTURE];
+ const char *repstr[MAXCAPTURE];
+ nmat--;
+ ovec += 2;
+ for (i = 0; i < nmat; i++) {
+ replen[i] = ovec[i * 2 + 1] - ovec[i * 2];
+ repstr[i] = &str[ovec[i * 2]];
+#ifdef DEBUG_PCRE_SUBST
+ printf(">>>%d %d %.*s\n", i, replen[i], replen[i], repstr[i]);
+#endif
+ }
+ slen = len;
+ len -= mvec[1] - mvec[0];
+ len += rlen = findreplen(rep, nmat, replen);
+#ifdef DEBUG_PCRE_SUBST
+ printf("resulting length %d (srclen=%d)\n", len, slen);
+#endif
+ cp = res = pcre_malloc(len + 1);
+ if (mvec[0] > 0) {
+ strncpy(cp, str, mvec[0]);
+ cp += mvec[0];
+ }
+ doreplace(cp, rep, nmat, replen, repstr);
+ cp += rlen;
+ if (mvec[1] < slen)
+ strcpy(cp, &str[mvec[1]]);
+ res[len] = 0;
+ return res;
+}
+
+char *
+pcre_subst(const pcre *ppat, const pcre_extra *extra, const char *str, int len,
+ int offset, int options, const char *rep)
+{
+ int nmat;
+ int ovec[MAXCAPTURE * 3];
+ nmat = pcre_exec(ppat, extra, str, len, offset, options,
+ ovec, sizeof(ovec));
+#ifdef DEBUG_PCRE_SUBST
+ dumpmatch(str, len, rep, nmat, ovec);
+#endif
+ if (nmat <= 0)
+ return NULL;
+ return(edit(str, len, rep, nmat, ovec));
+}
+
+#ifdef DEBUG_BUILD
+int
+main()
+{
+ char *pat = "quick\\s(\\w+)\\s(fox)";
+ char *rep = "$1ish $2";
+ char *str = "The quick brown foxy";
+ char *newstr;
+ const char *err;
+ int erroffset;
+ pcre_extra *extra;
+ pcre *ppat = pcre_compile(pat, 0, &err, &erroffset, NULL);
+ if (ppat == NULL) {
+ fprintf(stderr, "%s at %d\n", err, erroffset);
+ exit(1);
+ }
+ extra = pcre_study(ppat, 0, &err);
+ if (err != NULL)
+ fprintf(stderr, "Study %s failed: %s\n", pat, err);
+ newstr = pcre_subst(ppat, extra, str, strlen(str), 0, 0, rep);
+ if (newstr) {
+ printf("Newstr\t%s\n", newstr);
+ pcre_free(newstr);
+ } else {
+ printf("No match\n");
+ }
+ return 0;
+}
+#endif
diff --git a/spamfilter/Utilities/PCRE/pcre_subst/pcre_subst.h b/spamfilter/Utilities/PCRE/pcre_subst/pcre_subst.h
new file mode 100644
index 0000000..e4f4c44
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/pcre_subst/pcre_subst.h
@@ -0,0 +1,35 @@
+/*************************************************
+* PCRE string replacement *
+*************************************************/
+
+/*
+PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+pcre_subst is a wrapper around pcre_exec designed to make it easier to
+perform PERL style replacements with PCRE.
+
+Written by: Bert Driehuis <driehuis@playbeing.org>
+
+ Copyright (c) 2000 Bert Driehuis
+
+-----------------------------------------------------------------------------
+Permission is granted to anyone to use this software for any purpose on any
+computer system, and to redistribute it freely, subject to the following
+restrictions:
+
+1. This software is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+2. The origin of this software must not be misrepresented, either by
+ explicit claim or by omission.
+
+3. Altered versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+
+4. If PCRE is embedded in any software that is released under the GNU
+ General Purpose Licence (GPL), then the terms of that licence shall
+ supersede any condition above with which it is incompatible.
+*/
+
+char *pcre_subst(const pcre *, const pcre_extra *, const char *, int, int, int, const char *);
diff --git a/spamfilter/Utilities/UnicoWS/license.txt b/spamfilter/Utilities/UnicoWS/license.txt
new file mode 100644
index 0000000..d2cd6d2
--- /dev/null
+++ b/spamfilter/Utilities/UnicoWS/license.txt
@@ -0,0 +1,66 @@
+END-USER LICENSE AGREEMENT FOR MICROSOFT SOFTWARE:
+Microsoft Layer for Unicode on Windows 95, 98, and Me Systems,
+Version No. 1.1.3790.0
+
+IMPORTANT—READ CAREFULLY: This End-User License Agreement (“EULA”) is a legal agreement between you (either an individual or a single entity) and Microsoft Corporation for the Microsoft software product identified above, which includes computer software and may include associated media, printed materials, “online” or electronic documentation, and Internet-based services (“Product”). An amendment or addendum to this EULA may accompany the Product. YOU AGREE TO BE BOUND BY THE TERMS OF THIS EULA BY INSTALLING, COPYING, OR OTHERWISE USING THE PRODUCT. IF YOU DO NOT AGREE, DO NOT INSTALL, COPY, OR USE THE PRODUCT; YOU MAY RETURN IT TO YOUR PLACE OF PURCHASE FOR A FULL REFUND, IF APPLICABLE.
+SOFTWARE PRODUCT LICENSE
+
+1. GRANTS OF LICENSE. Microsoft grants you the rights described in this EULA provided that you comply with all terms and conditions of this EULA.
+ 1.1 General License Grant. You may install and use an unlimited number of copies of the Product on computers, including workstations, terminals or other digital electronic devices residing on your premises ("Computers") to design, develop, and test your software application(s) ("Licensee Products") for use with any version or edition of Microsoft Windows 95, Windows 98, Windows NT 4.0, Windows 2000 operating system products and/or any version or edition of any Microsoft operating system product that is a successor to the foregoing and/or any Microsoft product suite that contains any of the foregoing (each a "Microsoft Operating System).
+1.2 Documentation. You may make and use an unlimited number of copies of any documentation, provided that such copies shall be used only for personal purposes and are not to be republished or distributed (either in hard copy or electronic form) beyond your premises.
+1.3 Storage/Network Use. You may also store or install a copy of the Product on a storage device, such as a network server, used only to install or run the Product on computers used by a licensed end user in accordance with Section 1.1. A single license for the Product may not be shared or used concurrently by multiple end users.
+2. ADDITIONAL LICENSE RIGHTS -- REDISTRIBUTABLE CODE. In addition to the rights granted in Section 1, certain portions of the Product, as described in this Section 2, are provided to you with additional license rights. These additional license rights are conditioned upon your compliance with the distribution requirements and license restrictions described in Section 3.
+2.1 Sample Code. Microsoft grants you the right to use and modify the source code version of those portions of the Product identified as “Samples” in REDIST.TXT or elsewhere in the Product (“Sample Code”) for the sole purposes of designing, developing, and testing your product(s), and to reproduce and distribute the Sample Code, along with any modifications thereof, in object and/or source code form. For applicable redistribution requirements for Sample Code, see Section 3.1 below.
+2.2 Redistributable Code—General. Microsoft grants you a nonexclusive, royalty-free right to reproduce and distribute the object code form of any portion of the Product listed in REDIST.TXT (“Redistributable Code”). For general redistribution requirements for Redistributable Code, see Section 3.1, below.
+3. LICENSE RESTRICTIONS -- DISTRIBUTION REQUIREMENTS. If you choose to exercise your rights under Section 2, any redistribution by you is subject to your compliance with the following terms.
+3.1 If you are authorized and choose to redistribute Sample Code, or Redistributable Code (collectively, the “Redistributables”) as described in Section 2, you agree: (i) except as otherwise noted in Section 2.1 (Sample Code), to distribute the Redistributables only in object code form and in conjunction with and as a part of the Licensee Products developed by you that adds significant and primary functionality to the Redistributables; (ii) that the Redistributables only operate in conjunction with Microsoft Windows platforms; (iii) to distribute the Licensee Product containing the Redistributables pursuant to an end user license agreement (which may be “break-the-seal”, “click-wrap” or signed), with terms no less protective than those contained in this EULA; (iv) not to use Microsoft’s name, logo, or trademarks to market the Licensee Product; (v) to display your own valid copyright notice which shall be sufficient to protect Microsoft’s copyright in the Product; (vi) not to remove or obscure any copyright, trademark or patent notices that appear on the Product as delivered to you; (vii) to indemnify, hold harmless, and defend Microsoft from and against any claims or lawsuits, including attorney’s fees, that arise or result from the use or distribution of the Licensee Product; (viii) otherwise comply with the terms of this EULA; and (ix) agree that Microsoft reserves all rights not expressly granted.
+You also agree not to permit further distribution of the Redistributables by your end users except you may permit further redistribution of the Redistributables by your distributors to your end-user customers if your distributors only distribute the Redistributables in conjunction with, and as part of, the Licensee Product and you and your distributors comply with all other terms of this EULA.
+3.2 If you use the Redistributables, then in addition to your compliance with the applicable distribution requirements described for the Redistributables, the following also applies. Your license rights to the Redistributables are conditioned upon your (i) not incorporating Identified Product into or combining Identified Product with the Redistributables or a derivative work thereof; (ii) not distributing Identified Product in conjunction with the Redistributables or a derivative work thereof; and (iii) not using Identified Product in the development of a derivative work of the Redistributables. “Identified Product” means Product which is licensed pursuant to terms that directly or indirectly (A) create, or purport to create, obligations for Microsoft with respect to the Redistributables or derivative work thereof or (B) grant, or purport to grant, to any third party any rights or immunities under Microsoft’s intellectual property or proprietary rights in the Redistributables or derivative work thereof. Identified Product includes, without limitation, any Product that requires as a condition of its use, modification and/or distribution, that any other Product incorporated into, derived from or distributed with such Product must also be (1) disclosed or distributed in source code form; (2) licensed for the purpose of making derivative works; or (3) redistributable at no charge.
+4. DESCRIPTION OF OTHER RIGHTS AND LIMITATIONS
+5. RESERVATION OF RIGHTS. Microsoft reserves all rights not expressly granted to you in this EULA.
+6. UPGRADES. To use a Product identified as an upgrade, you must first be licensed for the product identified by Microsoft as eligible for the upgrade. After upgrading, you may no longer use the product that formed the basis for your upgrade eligibility.You may use the resulting upgraded product only in accordance with the terms of this EULA. If the Product is an upgrade of a component of a package of software programs that you licensed as a single product, the Product may be used and transferred only as part of that single product package and may not be separated for use by more than one end user.
+7. DOWNGRADES. Instead of installing and using the Product, you may install and use one copy of an earlier version of the Product, provided that you completely remove such earlier version and install the original Product within a reasonable time. Your use of such earlier version shall be governed by this EULA, and your rights to use such earlier version shall terminate when you install the original Product.
+8. LIMITATIONS ON REVERSE ENGINEERING, DECOMPILATION, AND DISASSEMBLY. You may not reverse engineer, decompile, or disassemble the Product, except and only to the extent that such activity is expressly permitted by applicable law notwithstanding this limitation.
+9. RENTAL. You may not rent, lease or lend the Product.
+10. TRADEMARKS. This EULA does not grant you any rights in connection with any trademarks or service marks of Microsoft.
+11. NOT FOR RESALE SOFTWARE. If the Product is labeled “Not For Resale” or “NFR,” then you may not resell, or otherwise transfer for value, the Product.
+12. ACADEMIC EDITION SOFTWARE. To use Product identified as “Academic Edition” or “AE,” you must be a “Qualified Educational User.” For qualification-related questions, please contact the Microsoft Sales Information Center/One Microsoft Way/Redmond, WA 98052-6399 or the Microsoft subsidiary serving your country.
+13. CONSENT TO USE OF DATA. You agree that Microsoft and its affiliates may collect and use technical information gathered as part of the product support services provided to you, if any, related to the Product. Microsoft may use this information solely to improve our products or to provide customized services or technologies to you and will not disclose this information in a form that personally identifies you.
+14. LINKS TO THIRD PARTY SITES. You may link to third party sites through the use of the Product. The third party sites are not under the control of Microsoft, and Microsoft is not responsible for the contents of any third party sites, any links contained in third party sites, or any changes or updates to third party sites. Microsoft is not responsible for webcasting or any other form of transmission received from any third party sites. Microsoft is providing these links to third party sites to you only as a convenience, and the inclusion of any link does not imply an endorsement by Microsoft of the third party site.
+15. U.S. GOVERNMENT LICENSE RIGHTS. All Product provided to the U.S. Government pursuant to solicitations issued on or after December 1, 1995 is provided with the commercial license rights and restrictions described elsewhere herein. All Product provided to the U.S. Government pursuant to solicitations issued prior to December 1, 1995 is provided with “Restricted Rights” as provided for in FAR, 48 CFR 52.227-14 (JUNE 1987) or DFAR, 48 CFR 252.227-7013 (OCT 1988), as applicable.
+16. EXPORT RESTRICTIONS. You acknowledge that the Product is subject to U.S. export jurisdiction. You agree to comply with all applicable international and national laws that apply to the Product, including the U.S. Export Administration Regulations, as well as end-user, end-use, and destination restrictions issued by U.S. and other governments. For additional information see <http://www.microsoft.com/exporting/>.
+17. ADDITIONAL SOFTWARE/SERVICES. This EULA applies to updates, supplements, add-on components, or Internet-based services components, of the Product that Microsoft may provide to you or make available to you after the date you obtain your initial copy of the Product, unless we provide other terms along with the update, supplement, add-on component, or Internet-based services component. Microsoft reserves the right to discontinue any Internet-based services provided to you or made available to you through the use of the Product.
+18. SOFTWARE TRANSFER. The initial user of the Product may make a one-time permanent transfer of this EULA and Product to another end user. This transfer must include all of the Product (including all component parts, the media and printed materials, any upgrades, this EULA, and, if applicable, the Certificate of Authenticity). The transfer may not be an indirect transfer, such as a consignment. Prior to the transfer, the end user receiving the Software must agree to all the EULA terms.
+19. TERMINATION. Without prejudice to any other rights, Microsoft may terminate this EULA if you fail to comply with the terms and conditions of this EULA. In such event, you must destroy all copies of the Product and all of its component parts.
+20. APPLICABLE LAW. If you acquired this Product in the United States, this EULA is governed by the laws of the State of Washington. If you acquired this Product in Canada, unless expressly prohibited by local law, this EULA is governed by the laws in force in the Province of Ontario, Canada; and, in respect of any dispute which may arise hereunder, you consent to the jurisdiction of the federal and provincial courts sitting in Toronto, Ontario. If this Product was acquired outside the United States, then local law may apply.
+21. The Product is protected by copyright and other intellectual property laws and treaties. Microsoft or its suppliers own the title, copyright, and other intellectual property rights in the Product. The Product is licensed, not sold.
+
+22. LIMITED WARRANTY FOR PRODUCT ACQUIRED IN THE US AND CANADA.
+Except with respect to the Redistributables, which are provided “AS IS,” without warranty of any kind, Microsoft warrants that the Product will perform substantially in accordance with the accompanying materials for a period of ninety days from the date of receipt.
+If an implied warranty or condition is created by your state/jurisdiction and federal or state/provincial law prohibits disclaimer of it, you also have an implied warranty or condition, BUT ONLY AS TO DEFECTS DISCOVERED DURING THE PERIOD OF THIS LIMITED WARRANTY (NINETY DAYS). AS TO ANY DEFECTS DISCOVERED AFTER THE NINETY (90) DAY PERIOD, THERE IS NO WARRANTY OR CONDITION OF ANY KIND. Some states/jurisdictions do not allow limitations on how long an implied warranty or condition lasts, so the above limitation may not apply to you.
+Any supplements or updates to the Product, including without limitation, any (if any) service packs or hot fixes provided to you after the expiration of the ninety day Limited Warranty period are not covered by any warranty or condition, express, implied or statutory.
+LIMITATION ON REMEDIES; NO CONSEQUENTIAL OR OTHER DAMAGES. Your exclusive remedy for any breach of this Limited Warranty is as set forth below. Except for any refund elected by Microsoft, YOU ARE NOT ENTITLED TO ANY DAMAGES, INCLUDING BUT NOT LIMITED TO CONSEQUENTIAL DAMAGES, if the Product does not meet Microsoft’s Limited Warranty, and, to the maximum extent allowed by applicable law, even if any remedy fails of its essential purpose. The terms of Section 25 below (“Exclusion of Incidental, Consequential and Certain Other Damages”) are also incorporated into this Limited Warranty. Some states/jurisdictions do not allow the exclusion or limitation of incidental or consequential damages, so the above limitation or exclusion may not apply to you. This Limited Warranty gives you specific legal rights. You may have others which vary from state/jurisdiction to state/jurisdiction. YOUR EXCLUSIVE REMEDY. Microsoft’s and its suppliers’ entire liability and your exclusive remedy shall be, at Microsoft’s option from time to time exercised subject to applicable law, (a) return of the price paid (if any) for the Product, or (b) repair or replacement of the Product, that does not meet this Limited Warranty and that is returned to Microsoft with a copy of your receipt. You will receive the remedy elected by Microsoft without charge, except that you are responsible for any expenses you may incur (e.g. cost of shipping the Product to Microsoft). This Limited Warranty is void if failure of the Product has resulted from accident, abuse, misapplication, abnormal use or a virus. Any replacement Product will be warranted for the remainder of the original warranty period or thirty (30) days, whichever is longer. Outside the United States or Canada, neither these remedies nor any product support services offered by Microsoft are available without proof of purchase from an authorized international source. To exercise your remedy, contact: Microsoft, Attn. Microsoft Sales Information Center/One Microsoft Way/Redmond, WA 98052-6399, or the Microsoft subsidiary serving your country.
+
+LIMITED WARRANTY FOR PRODUCT ACQUIRED OUTSIDE THE US OR CANADA.
+FOR THE LIMITED WARRANTIES AND SPECIAL PROVISIONS PERTAINING TO YOUR PARTICULAR JURISDICTION, PLEASE REFER TO YOUR WARRANTY BOOKLET INCLUDED WITH THIS PACKAGE OR PROVIDED WITH THE SOFTWARE PRODUCT PRINTED MATERIALS.
+
+23. DISCLAIMER OF WARRANTIES. The Limited Warranty that appears above is the only express warranty made to you and is provided in lieu of any other express warranties (if any) created by any documentation, packaging, or other communications. Except for the Limited Warranty and to the maximum extent permitted by applicable law, Microsoft and its suppliers provide the Product and support services (if any) AS IS AND WITH ALL FAULTS, and hereby disclaim all other warranties and conditions, either express, implied or statutory, including, but not limited to, any (if any) implied warranties, duties or conditions of merchantability, of fitness for a particular purpose, of reliability or availability, of accuracy or completeness of responses, of results, of workmanlike effort, of lack of viruses, and of lack of negligence, all with regard to the Product, and the provision of or failure to provide support or other services, information, software, and related content through the Product or otherwise arising out of the use of the Product. ALSO, THERE IS NO WARRANTY OR CONDITION OF TITLE, QUIET ENJOYMENT, QUIET POSSESSION, CORRESPONDENCE TO DESCRIPTION OR NON-INFRINGEMENT WITH REGARD TO THE PRODUCT.
+24. EXCLUSION OF INCIDENTAL, CONSEQUENTIAL AND CERTAIN OTHER DAMAGES. TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT SHALL MICROSOFT OR ITS SUPPLIERS BE LIABLE FOR ANY SPECIAL, INCIDENTAL, PUNITIVE, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, BUT NOT LIMITED TO, DAMAGES FOR LOSS OF PROFITS OR CONFIDENTIAL OR OTHER INFORMATION, FOR BUSINESS INTERRUPTION, FOR PERSONAL INJURY, FOR LOSS OF PRIVACY, FOR FAILURE TO MEET ANY DUTY INCLUDING OF GOOD FAITH OR OF REASONABLE CARE, FOR NEGLIGENCE, AND FOR ANY OTHER PECUNIARY OR OTHER LOSS WHATSOEVER) ARISING OUT OF OR IN ANY WAY RELATED TO THE USE OF OR INABILITY TO USE THE PRODUCT, THE PROVISION OF OR FAILURE TO PROVIDE SUPPORT OR OTHER SERVICES, INFORMATON, SOFTWARE, AND RELATED CONTENT THROUGH THE PRODUCT OR OTHERWISE ARISING OUT OF THE USE OF THE PRODUCT, OR OTHERWISE UNDER OR IN CONNECTION WITH ANY PROVISION OF THIS EULA, EVEN IN THE EVENT OF THE FAULT, TORT (INCLUDING NEGLIGENCE), STRICT LIABILITY, BREACH OF CONTRACT OR BREACH OF WARRANTY OF MICROSOFT OR ANY SUPPLIER, AND EVEN IF MICROSOFT OR ANY SUPPLIER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+25. LIMITATION OF LIABILITY AND REMEDIES. Notwithstanding any damages that you might incur for any reason whatsoever (including, without limitation, all damages referenced above and all direct or general damages), the entire liability of Microsoft and any of its suppliers under any provision of this EULA and your exclusive remedy for all of the foregoing (except for any remedy of repair or replacement elected by Microsoft with respect to any breach of the Limited Warranty) shall be limited to the greater of the amount actually paid by you for the Product or U.S.$5.00. The foregoing limitations, exclusions and disclaimers (including Sections 22, 23, and 24 above) shall apply to the maximum extent permitted by applicable law, even if any remedy fails its essential purpose.
+26. ENTIRE AGREEMENT. This EULA (including any addendum or amendment to this EULA which is included with the Product) are the entire agreement between you and Microsoft relating to the Product and the support services (if any) and they supersede all prior or contemporaneous oral or written communications, proposals and representations with respect to the Product or any other subject matter covered by this EULA. To the extent the terms of any Microsoft policies or programs for support services conflict with the terms of this EULA, the terms of this EULA shall control.
+
+Si vous avez acquis votre produit Microsoft au CANADA, la garantie limitée suivante vous concerne :
+GARANTIE LIMITÉE
+Sauf pur celles du “Redistributables,” qui sont fournies “comme telles,” Microsoft garantit que le Produit fonctionnera conformément aux documents inclus pendant une période de 90 jours suivant la date de réception.
+Si une garantie ou condition implicite est créée par votre État ou votre territoire et qu’une loi fédérale ou provinciale ou État en interdit le déni, vous jouissez également d’une garantie ou condition implicite, MAIS UNIQUEMENT POUR LES DÉFAUTS DÉCOUVERTS DURANT LA PÉRIODE DE LA PRÉSENTE GARANTIE LIMITÉE (QUATRE-VINGT-DIX JOURS). IL N’Y A AUCUNE GARANTIE OU CONDITION DE QUELQUE NATURE QUE CE SOIT QUANT AUX DÉFAUTS DÉCOUVERTS APRÈS CETTE PÉRIODE DE QUATRE-VINGT-DIX JOURS. Certains États ou territoires ne permettent pas de limiter la durée d’une garantie ou condition implicite de sorte que la limitation ci­dessus peut ne pas s’appliquer à vous.
+Tous les suppléments ou toutes les mises à jour relatifs au Produit, notamment, les ensembles de services ou les réparations à chaud (le cas échéant) qui vous sont fournis après l’expiration de la période de quatre-vingt-dix jours de la garantie limitée ne sont pas couverts par quelque garantie ou condition que ce soit, expresse ou implicite.
+LIMITATION DES RECOURS; ABSENCE DE DOMMAGES INDIRECTS OU AUTRES. Votre recours exclusif pour toute violation de la présente garantie limitée est décrit ci­après. Sauf pour tout remboursement au choix de Microsoft, si le Produit ne respecte pas la garantie limitée de Microsoft et, dans la mesure maximale permise par les lois applicables, même si tout recours n’atteint pas son but essentiel, VOUS N’AVEZ DROIT À AUCUNS DOMMAGES, NOTAMMENT DES DOMMAGES INDIRECTS. Les modalités de la clause «Exclusion des dommages accessoires, indirects et de certains autres dommages » sont également intégrées à la présente garantie limitée. Certains États ou territoires ne permettent pas l’exclusion ou la limitation des dommages indirects ou accessoires de sorte que la limitation ou l’exclusion ci­dessus peut ne pas s’appliquer à vous. La présente garantie limitée vous donne des droits légaux spécifiques. Vous pouvez avoir d’autres droits qui peuvent varier d’un territoire ou d’un État à un autre. VOTRE RECOURS EXCLUSIF. L’obligation intégrale de Microsoft et de ses fournisseurs et votre recours exclusif seront, selon le choix de Microsoft de temps à autre sous réserve de toute loi applicable, a) le remboursement du prix payé, le cas échéant, pour le Produit ou b) la réparation ou le remplacement du Produit qui ne respecte pas la présente garantie limitée et qui est retourné à Microsoft avec une copie de votre reçu. Vous recevrez la compensation choisie par Microsoft, sans frais, sauf que vous êtes responsable des dépenses que vous pourriez engager (p. ex., les frais d’envoi du Produit à Microsoft). La présente garantie limitée est nulle si la défectuosité du Produit est causée par un accident, un usage abusif, une mauvaise application, un usage anormal ou un virus. Tout Produit de remplacement sera garanti pour le reste de la période de garantie initiale ou pendant trente (30) jours, selon la plus longue entre ces deux périodes. À l’extérieur des États-Unis ou du Canada, ces recours ou l’un quelconque des services de soutien technique offerts par Microsoft ne sont pas disponibles sans preuve d’achat d’une source internationale autorisée. Pour exercer votre recours, vous devez communiquer avec Microsoft et vous adresser au Microsoft Sales Information Center/One Microsoft Way/Redmond, WA 98052-6399, ou à la filiale de Microsoft de votre pays.
+
+DÉNI DE GARANTIES. La garantie limitée mentionnée ci-dessus constitue la seule garantie expresse qui vous est donnée et remplace toutes autres garanties expresses (s’il en est) mentionnées dans un document ou sur un emballage. Sauf en ce qui a trait à la garantie limitée et dans la mesure maximale permise par les lois applicables, le Produit et les services de soutien technique (le cas échéant) sont fournis TELS QUELS ET AVEC TOUS LES DÉFAUTS par Microsoft et ses fournisseurs, lesquels par les présentes dénient toutes autres garanties et conditions expresses, implicites ou en vertu de la loi, notamment (le cas échéant) les garanties, devoirs ou conditions implicites de qualité marchande, d’adaptation à un usage particulier, d’exactitude ou d’exhaustivité des réponses, des résultats, des efforts déployés selon les règles de l’art, d’absence de virus et de négligence, le tout à l’égard du Produit et de la prestation des services de soutien technique ou de l’omission d’une telle prestation. PAR AILLEURS, IL N’Y A AUCUNE GARANTIE OU CONDITION QUANT AU TITRE DE PROPRIÉTÉ, À LA JOUISSANCE OU LA POSSESSION PAISIBLE, À LA CONCORDANCE À UNE DESCRIPTION NI QUANT À UNE ABSENCE DE CONTREFAÇON CONCERNANT LE PRODUIT.
+EXCLUSION DES DOMMAGES ACCESSOIRES, INDIRECTS ET DE CERTAINS AUTRES DOMMAGES. DANS LA MESURE MAXIMALE PERMISE PAR LES LOIS APPLICABLES, EN AUCUN CAS MICROSOFT OU SES FOURNISSEURS NE SERONT RESPONSABLES DES DOMMAGES SPÉCIAUX, CONSÉCUTIFS, ACCESSOIRES OU INDIRECTS DE QUELQUE NATURE QUE CE SOIT (NOTAMMENT, LES DOMMAGES À L’ÉGARD DU MANQUE À GAGNER OU DE LA DIVULGATION DE RENSEIGNEMENTS CONFIDENTIELS OU AUTRES, DE LA PERTE D’EXPLOITATION, DE BLESSURES CORPORELLES, DE LA VIOLATION DE LA VIE PRIVÉE, DE L’OMISSION DE REMPLIR TOUT DEVOIR, Y COMPRIS D’AGIR DE BONNE FOI OU D’EXERCER UN SOIN RAISONNABLE, DE LA NÉGLIGENCE ET DE TOUTE AUTRE PERTE PÉCUNIAIRE OU AUTRE PERTE DE QUELQUE NATURE QUE CE SOIT) SE RAPPORTANT DE QUELQUE MANIÈRE QUE CE SOIT À L’UTILISATION DU PRODUIT OU À L’INCAPACITÉ DE S’EN SERVIR, À LA PRESTATION OU À L’OMISSION D’UNE TELLE PRESTATION DE SERVICES DE SOUTIEN TECHNIQUE OU AUTREMENT AUX TERMES DE TOUTE DISPOSITION DU PRÉSENT EULA OU RELATIVEMENT À UNE TELLE DISPOSITION, MÊME EN CAS DE FAUTE, DE DÉLIT CIVIL (Y COMPRIS LA NÉGLIGENCE), DE RESPONSABILITÉ STRICTE, DE VIOLATION DE CONTRAT OU DE VIOLATION DE GARANTIE DE MICROSOFT OU DE TOUT FOURNISSEUR ET MÊME SI MICROSOFT OU TOUT FOURNISSEUR A ÉTÉ AVISÉ DE LA POSSIBILITÉ DE TELS DOMMAGES.
+LIMITATION DE RESPONSABILITÉ ET RECOURS. Malgré les dommages que vous puissiez subir pour quelque motif que ce soit (notamment, tous les dommages susmentionnés et tous les dommages directs ou généraux), l’obligation intégrale de Microsoft et de l’un ou l’autre de ses fournisseurs aux termes de toute disposition du présent EULA et votre recours exclusif à l’égard de tout ce qui précède (sauf en ce qui concerne tout recours de réparation ou de remplacement choisi par Microsoft à l’égard de tout manquement à la garantie limitée) se limite au plus élevé entre les montants suivants : le montant que vous avez réellement payé pour le Produit ou 5,00 $US. Les limites, exclusions et dénis qui précèdent (y compris les clauses ci-dessus), s’appliquent dans la mesure maximale permise par les lois applicables, même si tout recours n’atteint pas son but essentiel.
+La présente Convention est régie par les lois de la province d’Ontario, Canada. Chacune des parties à la présente reconnaît irrévocablement la compétence des tribunaux de la province d’Ontario et consent à instituer tout litige qui pourrait découler de la présente auprès des tribunaux situés dans le district judiciaire de York, province d’Ontario.
+Au cas où vous auriez des questions concernant cette licence ou que vous désiriez vous mettre en rapport avec Microsoft pour quelque raison que ce soit, veuillez contacter la succursale Microsoft desservant votre pays, dont l’adresse est fournie dans ce produit, ou écrivez à : Microsoft Sales Information Center, One Microsoft Way, Redmond, Washington 98052-6399.
+
+
+
diff --git a/spamfilter/Utilities/UnicoWS/redist.txt b/spamfilter/Utilities/UnicoWS/redist.txt
new file mode 100644
index 0000000..e2f66a2
--- /dev/null
+++ b/spamfilter/Utilities/UnicoWS/redist.txt
@@ -0,0 +1,12 @@
+===============================================
+Microsoft Layer for Unicode on Windows 95/98/ME
+===============================================
+
+In addition to the rights granted in Section 1 of the Agreement ("Agreement"), with
+respect to UNICOWS.DLL, you have the following non-exclusive, royalty free
+rights subject to the Distribution Requirements detailed in Sections 1 and 3 of
+the Agreement:
+
+(1) You may distribute UNICOWS.DLL with the following: Windows 95, Windows 98,
+Windows 98 Second Edition, Windows Millennium, Windows NT4, Windows 2000, Windows XP,
+and Windows Server 2003.
diff --git a/spamfilter/Utilities/UnicoWS/unicows.dll b/spamfilter/Utilities/UnicoWS/unicows.dll
new file mode 100644
index 0000000..d992808
--- /dev/null
+++ b/spamfilter/Utilities/UnicoWS/unicows.dll
Binary files differ
diff --git a/spamfilter/Utilities/UnicoWS/unicows.lib b/spamfilter/Utilities/UnicoWS/unicows.lib
new file mode 100644
index 0000000..124ec84
--- /dev/null
+++ b/spamfilter/Utilities/UnicoWS/unicows.lib
Binary files differ
diff --git a/spamfilter/Utilities/UnicoWS/unicows.pdb b/spamfilter/Utilities/UnicoWS/unicows.pdb
new file mode 100644
index 0000000..4fb0f58
--- /dev/null
+++ b/spamfilter/Utilities/UnicoWS/unicows.pdb
Binary files differ