diff options
| author | Vadim Dashevskiy <watcherhd@gmail.com> | 2012-07-24 06:41:19 +0000 | 
|---|---|---|
| committer | Vadim Dashevskiy <watcherhd@gmail.com> | 2012-07-24 06:41:19 +0000 | 
| commit | a33833212f040272fc6c97047c8cb335b6f5405a (patch) | |
| tree | d778dfd7187571182d9a1333a72cc941b60f7e3d /plugins/SmileyAdd/src/regexp | |
| parent | ec5a78dd895f86e9077caf1aacd3e49b48dffbcf (diff) | |
SimpleAR, SimpleStatusMsg, SmileyAdd, SpellChecker: changed folder structure 
git-svn-id: http://svn.miranda-ng.org/main/trunk@1149 1316c22d-e87f-b044-9b9b-93d7a3e3ba9c
Diffstat (limited to 'plugins/SmileyAdd/src/regexp')
| -rw-r--r-- | plugins/SmileyAdd/src/regexp/Matcher.cpp | 178 | ||||
| -rw-r--r-- | plugins/SmileyAdd/src/regexp/Matcher.h | 248 | ||||
| -rw-r--r-- | plugins/SmileyAdd/src/regexp/Pattern.cpp | 1709 | ||||
| -rw-r--r-- | plugins/SmileyAdd/src/regexp/Pattern.h | 1663 | ||||
| -rw-r--r-- | plugins/SmileyAdd/src/regexp/WCMatcher.cpp | 181 | ||||
| -rw-r--r-- | plugins/SmileyAdd/src/regexp/WCMatcher.h | 234 | ||||
| -rw-r--r-- | plugins/SmileyAdd/src/regexp/WCPattern.cpp | 1747 | ||||
| -rw-r--r-- | plugins/SmileyAdd/src/regexp/WCPattern.h | 1663 | ||||
| -rw-r--r-- | plugins/SmileyAdd/src/regexp/test.cpp | 38 | 
9 files changed, 7661 insertions, 0 deletions
| diff --git a/plugins/SmileyAdd/src/regexp/Matcher.cpp b/plugins/SmileyAdd/src/regexp/Matcher.cpp new file mode 100644 index 0000000000..ebd1f57850 --- /dev/null +++ b/plugins/SmileyAdd/src/regexp/Matcher.cpp @@ -0,0 +1,178 @@ +#include <Matcher.h>
 +#include <Pattern.h>
 +
 +const int Matcher::MATCH_ENTIRE_STRING = 0x01;
 +
 +/*
 +  Detailed documentation is provided in this class' header file
 +
 +  @author   Jeffery Stuart
 +  @since    November 2004
 +  @version  1.07.00
 +*/
 +
 +Matcher::Matcher(Pattern * pattern, const bkstring & text)
 +{
 +  pat = pattern;
 +  str = &text;
 +  gc = pattern->groupCount;
 +  ncgc = -pattern->nonCapGroupCount;
 +  flags = 0;
 +  matchedSomething = false;
 +  starts        = new int[gc + ncgc];
 +  ends          = new int[gc + ncgc];
 +  groups        = new int[gc + ncgc];
 +  groupPos      = new int[gc + ncgc];
 +  groupIndeces  = new int[gc + ncgc];
 +  starts        = starts        + ncgc;
 +  ends          = ends          + ncgc;
 +  groups        = groups        + ncgc;
 +  groupPos      = groupPos      + ncgc;
 +  groupIndeces  = groupIndeces  + ncgc;
 +  for (int i = 0; i < gc; ++i) starts[i] = ends[i] = 0;
 +}
 +Matcher::~Matcher()
 +{
 +  delete [] (starts       - ncgc);
 +  delete [] (ends         - ncgc);
 +  delete [] (groups       - ncgc);
 +  delete [] (groupIndeces - ncgc);
 +  delete [] (groupPos     - ncgc);
 +}
 +void Matcher::clearGroups()
 +{
 +  int i;
 +  lm = 0;
 +  for (i = 0; i < gc; ++i)    groups[i] = starts[i] = ends[i] = -1;
 +  for (i = 1; i <= ncgc; ++i) groups[0 - i] = -1;
 +}
 +bkstring Matcher::replaceWithGroups(const bkstring & str)
 +{
 +  bkstring ret = "";
 +
 +  bkstring t = str;
 +  while (t.size() > 0)
 +  {
 +    if (t[0] == '\\')
 +    {
 +      t.erase(0, 1);
 +      if (t.size() == 0)
 +      {
 +        ret += "\\";
 +      }
 +      else if (t[0] < '0' || t[0] > '9')
 +      {
 +        ret += t[0];
 +        t.erase(0, 1);
 +      }
 +      else
 +      {
 +        int gn = 0;
 +        while (t.size() > 0 && t[0] >= '0' && t[0] <= '9')
 +        {
 +          gn = gn * 10 + (t[0] - '0');
 +          t.erase(0, 1);
 +        }
 +        ret += getGroup(gn);
 +      }
 +    }
 +    else
 +    {
 +        ret += t[0];
 +        t.erase(0, 1);
 +    }
 +  }
 +
 +  return ret;
 +}
 +unsigned long Matcher::getFlags() const
 +{
 +  return flags;
 +}
 +const bkstring& Matcher::getText() const
 +{
 +  return *str;
 +}
 +
 +bool Matcher::matches()
 +{
 +  flags = MATCH_ENTIRE_STRING;
 +  matchedSomething = false;
 +  clearGroups();
 +  lm = 0;
 +  return pat->head->match(*str, this, 0) == (int)str->size();
 +}
 +bool Matcher::findFirstMatch()
 +{
 +  starts[0] = 0;
 +  flags = 0;
 +  clearGroups();
 +  start = 0;
 +  lm = 0;
 +  ends[0] = pat->head->match(*str, this, 0);
 +  if (ends[0] >= 0)
 +  {
 +    matchedSomething = true;
 +    return 1;
 +  }
 +  return 0;
 +}
 +bool Matcher::findNextMatch()
 +{
 +  int s = starts[0], e = ends[0];
 +
 +  if (!matchedSomething) return findFirstMatch();
 +  if (s == e) ++e;
 +  flags = 0;
 +  clearGroups();
 +
 +  starts[0] = e;
 +  if (e >= (int)str->size()) return 0;
 +  start = e;
 +  lm = e;
 +  ends[0] = pat->head->match(*str, this, e);
 +  return ends[0] >= 0;
 +}
 +std::vector<bkstring> Matcher::findAll()
 +{
 +  std::vector<bkstring> ret;
 +  reset();
 +  while (findNextMatch())
 +  {
 +    ret.push_back(getGroup());
 +  }
 +  return ret;
 +}
 +
 +void Matcher::reset()
 +{
 +  lm = 0;
 +  clearGroups();
 +  matchedSomething = false;
 +}
 +
 +int Matcher::getStartingIndex(const int groupNum) const
 +{
 +  if (groupNum < 0 || groupNum >= gc) return -1;
 +  return starts[groupNum];
 +}
 +int Matcher::getEndingIndex(const int groupNum) const
 +{
 +  if (groupNum < 0 || groupNum >= gc) return -1;
 +  return ends[groupNum];
 +}
 +bkstring Matcher::getGroup(const int groupNum) const
 +{
 +  if (groupNum < 0 || groupNum >= gc) return "";
 +  if (starts[groupNum] < 0 || ends[groupNum] < 0) return "";
 +  return str->substr(starts[groupNum], ends[groupNum] - starts[groupNum]);
 +}
 +std::vector<bkstring> Matcher::getGroups(const bool includeGroupZero) const
 +{
 +  int i, start = (includeGroupZero ? 0 : 1);
 +  std::vector<bkstring> ret;
 +
 +  for (i = start; i < gc; ++i) ret.push_back(getGroup(i));
 +  return ret;
 +}
 +
 diff --git a/plugins/SmileyAdd/src/regexp/Matcher.h b/plugins/SmileyAdd/src/regexp/Matcher.h new file mode 100644 index 0000000000..621184d8c2 --- /dev/null +++ b/plugins/SmileyAdd/src/regexp/Matcher.h @@ -0,0 +1,248 @@ +#ifndef __MATCHER_H__
 +#define __MATCHER_H__
 +
 +#include "bkstring.h"
 +#include <vector>
 +
 +class Vector;
 +class NFANode;
 +class NFAStartNode;
 +class NFAEndNode;
 +class NFAGroupHeadNode;
 +class NFAGroupLoopNode;
 +class NFAGroupLoopPrologueNode;
 +class NFAGroupTailNode;
 +class NFALookBehindNode;
 +class NFAStartOfLineNode;
 +class NFAEndOfLineNode;
 +class NFAEndOfMatchNode;
 +class NFAReferenceNode;
 +class Pattern;
 +
 +/**
 +  A matcher is a non thread-safe object used to scan strings using a given
 +  {@link Pattern Pattern} object. Using a <code>Matcher</code> is the preferred
 +  method for scanning strings. Matchers are not thread-safe. Matchers require
 +  very little dynamic memory, hence one is encouraged to create several
 +  instances of a matcher when necessary as opposed to sharing a single instance
 +  of a matcher.
 +  <p>
 +  The most common methods needed by the matcher are <code>matches</code>,
 +  <code>findNextMatch</code>, and <code>getGroup</code>. <code>matches</code>
 +  and <code>findNextMatch</code> both return success or failure, and further
 +  details can be gathered from their documentation.
 +  <p>
 +  Unlike Java's <code>Matcher</code>, this class allows you to change the string
 +  you are matching against. This provides a small optimization, since you no
 +  longer need multiple matchers for a single pattern in a single thread.
 +  <p>
 +  This class also provides an extremely handy method for replacing text with
 +  captured data via the <code>replaceWithGroups</code> method. A typical
 +  invocation looks like:
 +  <pre>
 +  char buf[10000];
 +  bkstring str = "\\5 (user name \\1) uses \\7 for his/her shell and \\6 is their home directory";
 +  FILE * fp = fopen("/etc/passwd", "r");
 +  Pattern::registerPattern("entry", "[^:]+");
 +  Pattern * p = Pattern::compile("^({entry}):({entry}):({entry}):({entry}):({entry}):({entry}):({entry})$",
 +                                 Pattern::MULTILINE_MATCHING | Pattern::UNIX_LINE_MODE);
 +  Matcher * m = p->createMatcher("");
 +  while (fgets(buf, 9999, fp))
 +  {
 +    m->setString(buf);
 +    if (m->matches())
 +    {
 +      printf("%s\n", m->replaceWithGroups(str).c_str());
 +    }
 +  }
 +  fclose(fp);
 +
 +  </pre>
 +  Calling any of the following functions before first calling
 +  <code>matches</code>, <code>findFirstMatch</code>, or
 +  <code>findNextMatch</code> results in undefined behavior and may cause your
 +  program to crash.
 +  <code>
 +  <ul>
 +    <li>replaceWithGroups</code>
 +    <li>getStartingIndex</li>
 +    <li>getEndingIndex</li>
 +    <li>getGroup</li>
 +    <li>getGroups</li>
 +  </ul>
 +  </code>
 +  <p>
 +  The function <code>findFirstMatch</code> will attempt to find the first match
 +  in the input string. The same results can be obtained by first calling
 +  <code>reset</code> followed by <code>findNextMatch</code>.
 +  <p>
 +  To eliminate the necessity of looping through a string to find all the
 +  matching substrings, <code>findAll</code> was created. The function will find
 +  all matching substrings and return them in a <code>vector</code>. If you need
 +  to examine specific capture groups within the substrings, then this method
 +  should not be used.
 +
 +  @author    Jeffery Stuart
 +  @since     March 2003, Stable Since November 2004
 +  @version   1.07.00
 +  @memo Mutable object used on instances of a Pattern class
 + */
 +class Matcher
 +{
 +  friend class NFANode;
 +  friend class NFAStartNode;
 +  friend class NFAEndNode;
 +  friend class NFAGroupHeadNode;
 +  friend class NFAGroupLoopNode;
 +  friend class NFAGroupLoopPrologueNode;
 +  friend class NFAGroupTailNode;
 +  friend class NFALookBehindNode;
 +  friend class NFAStartOfLineNode;
 +  friend class NFAEndOfLineNode;
 +  friend class NFAEndOfMatchNode;
 +  friend class NFAReferenceNode;
 +  friend class Pattern;
 +  private:
 +    /**
 +      Creates a new matcher object against <code>text</code> using
 +      <code>pattern</code>.
 +
 +      @param pattern  The pattern with which to search
 +      @param text     The text in which to search
 +     */
 +    Matcher(Pattern * pattern, const bkstring & text);
 +  protected:
 +    /// The pattern we use to match
 +    Pattern * pat;
 +    /// The string in which we are matching
 +    const bkstring* str;
 +    /// The starting point of our match
 +    int start;
 +    /// An array of the starting positions for each group
 +    int * starts;
 +    /// An array of the ending positions for each group
 +    int * ends;
 +    /// An array of private data used by NFANodes during matching
 +    int * groups;
 +    /// An array of private data used by NFANodes during matching
 +    int * groupIndeces;
 +    /// An array of private data used by NFANodes during matching
 +    int * groupPos;
 +    /// The ending index of the last match
 +    int lm;
 +    /// The number of capturing groups we have
 +    int gc;
 +    /// The number of non-capturing groups we havew
 +    int ncgc;
 +    /// Whether or not we have matched something (used only by findFirstMatch and findNextMatch)
 +    int matchedSomething;
 +    /// The flags with which we were made
 +    unsigned long flags;
 +    /// Called by reset to clear the group arrays
 +    void clearGroups();
 +  public:
 +    /// Used internally by match to signify we want the entire string matched
 +    const static int MATCH_ENTIRE_STRING;
 +  public:
 +    /// Cleans up the dynamic memory used by this matcher
 +    ~Matcher();
 +    /**
 +      Replaces the contents of <code>str</code> with the appropriate captured
 +      text. <code>str</code> should have at least one back reference, otherwise
 +      this function does nothing.
 +      @param str The string in which to replace text
 +      @return A string with all backreferences appropriately replaced
 +     */
 +    bkstring replaceWithGroups(const bkstring & str);
 +    /**
 +      The flags currently being used by the matcher.
 +      @return Zero
 +     */
 +    unsigned long getFlags() const;
 +    /**
 +      The text being searched by the matcher.
 +      @return the text being searched by the matcher.
 +     */
 +    const bkstring& getText() const;
 +
 +    /**
 +      Scans the string from start to finish for a match. The entire string must
 +      match for this function to return success. Group variables are
 +      appropriately set and can be queried after this function returns.
 +
 +      @return Success if and only if the entire string matches the pattern
 +     */
 +    bool matches();
 +    /**
 +      Scans the string for the first substring matching the pattern. The entire
 +      string does not necessarily have to match for this function to return
 +      success. Group variables are appropriately set and can be queried after
 +      this function returns.
 +
 +      @return Success if any substring matches the specified pattern
 +     */
 +    bool findFirstMatch();
 +    /**
 +      Scans the string for the next substring matching the pattern. If no calls
 +      have been made to findFirstMatch of findNextMatch since the last call to
 +      reset, matches, or setString, then this function's behavior results to
 +      that of findFirstMatch.
 +
 +      @return Success if another substring can be found that matches the pattern
 +     */
 +    bool findNextMatch();
 +    /**
 +      Returns a vector of every substring in order which matches the given
 +      pattern.
 +
 +      @return Every substring in order which matches the given pattern
 +     */
 +    std::vector<bkstring> findAll();
 +    /**
 +      Resets the internal state of the matcher
 +     */
 +    void reset();
 +    /**
 +      Same as getText. Left n for backwards compatibilty with old source code
 +      @return Returns the string that is currently being used for matching
 +     */
 +    inline const bkstring&  getString()                           const { return *str; }
 +    /**
 +      Sets the string to scan
 +      @param newStr The string to scan for subsequent matches
 +     */
 +    inline void         setString(const bkstring & newStr)       { str = &newStr; reset(); }
 +
 +    /**
 +      Returns the starting index of the specified group.
 +      @param groupNum The group to query
 +      @return The starting index of the group if it was matched, -1 for an
 +              invalid group or if the group was not matched
 +     */
 +    int getStartingIndex(const int groupNum = 0) const;
 +    /**
 +      Returns the ending index of the specified group.
 +      @param groupNum The group to query
 +      @return The ending index of the group if it was matched, -1 for an
 +              invalid group or if the group was not matched
 +     */
 +    int getEndingIndex(const int groupNum = 0) const;
 +    /**
 +      Returns the specified group. An empty string ("") does not necessarily
 +      mean the group was not matched. A group such as (a*b?) could be matched by
 +      a zero length. If an empty string is returned, getStartingIndex can be
 +      called to determine if the group was actually matched.
 +      @param groupNum The group to query
 +      @return The text of the group
 +     */
 +    bkstring getGroup(const int groupNum = 0) const;
 +    /**
 +      Returns every capture group in a vector
 +
 +      @param includeGroupZero Whether or not include capture group zero
 +      @return Every capture group
 +     */
 +    std::vector<bkstring> getGroups(const bool includeGroupZero = 0) const;
 +};
 +
 +#endif
 diff --git a/plugins/SmileyAdd/src/regexp/Pattern.cpp b/plugins/SmileyAdd/src/regexp/Pattern.cpp new file mode 100644 index 0000000000..4487dddd4b --- /dev/null +++ b/plugins/SmileyAdd/src/regexp/Pattern.cpp @@ -0,0 +1,1709 @@ +/**
 +  From the author (Jeff Stuart)
 +  "
 +  Let me start by saying this file is pretty big. If you feel up to it, you can
 +  try making changes yourself, but you would be better off to just email me at
 +  stuart@cs.ucdavis.edu if you think there is a bug, or have something useful you
 +  would like added. This project is very "near and dear" to me, so I am fairly
 +  quick to make bug fixes. The header files for Pattern and Matcher are fairly
 +  well documented and the function names are pretty self-explanatory, but if you
 +  are having any trouble, feel free to email me at stuart@cs.ucdavis.edu.
 +
 +  If you email me, make sure you put something like C++RE in the subject because
 +  I tend to delete email if I don't recognize the name and the subject is
 +  something like "I Need Your Help" or "Got A Second" or "I Found It".
 +  "
 + */
 +
 +/*
 +  Detailed documentation is provided in this class' header file
 +
 +  @author   Jeffery Stuart
 +  @since    November 2004
 +  @version  1.07.00
 +*/
 +
 +#define to_lower(a) (char)(unsigned)CharLowerA((LPSTR)static_cast<unsigned char>(a))
 +#define is_alpha IsCharAlphaA
 +
 +#ifdef _WIN32
 +  #pragma warning(push)
 +  #pragma warning(disable:4996)
 +  #define str_icmp lstrcmpiA
 +#else
 +  #define str_icmp strcasecmp
 +#endif
 +
 +#include <Pattern.h>
 +#include <Matcher.h>
 +#include <cstring>
 +#include <algorithm>
 +
 +std::map<bkstring, Pattern *> Pattern::compiledPatterns;
 +std::map<bkstring, std::pair<bkstring, unsigned long> > Pattern::registeredPatterns;
 +
 +const int Pattern::MIN_QMATCH = 0x00000000;
 +const int Pattern::MAX_QMATCH = 0x7FFFFFFF;
 +
 +const unsigned long Pattern::CASE_INSENSITIVE       = 0x01;
 +const unsigned long Pattern::LITERAL                = 0x02;
 +const unsigned long Pattern::DOT_MATCHES_ALL        = 0x04;
 +const unsigned long Pattern::MULTILINE_MATCHING     = 0x08;
 +const unsigned long Pattern::UNIX_LINE_MODE         = 0x10;
 +
 +Pattern::Pattern(const bkstring & rhs)
 +{
 +  matcher = NULL;
 +  pattern = rhs;
 +  curInd = 0;
 +  groupCount = 0;
 +  nonCapGroupCount = 0;
 +  error = 0;
 +  head = NULL;
 +}
 +// convenience function in case we want to add any extra debugging output
 +void Pattern::raiseError()
 +{
 +/*  switch (pattern[curInd - 1])
 +  {
 +  case '*':
 +  case ')':
 +  case '+':
 +  case '?':
 +  case ']':
 +  case '}':
 +    fprintf(stderr, "%s\n%*c^\n", pattern.c_str(), curInd - 1, ' ');
 +    fprintf(stderr, "Syntax Error near here. Possible unescaped meta character.\n");
 +    break;
 +  default:
 +    fprintf(stderr, "%s\n%*c^\n", pattern.c_str(), curInd - 1, ' ');
 +    fprintf(stderr, "Syntax Error near here. \n");
 +    break;
 +  }*/
 +  error = 1;
 +}
 +NFANode * Pattern::registerNode(NFANode * node)
 +{
 +  nodes[node] = 1;
 +  return node;
 +}
 +
 +bkstring Pattern::classUnion      (bkstring s1, bkstring s2)  const
 +{
 +  char out[300];
 +  std::sort(s1.begin(), s1.end());
 +  std::sort(s2.begin(), s2.end());
 +  *std::set_union(s1.begin(), s1.end(), s2.begin(), s2.end(), out) = 0;
 +  return out;
 +}
 +bkstring Pattern::classIntersect  (bkstring s1, bkstring s2)  const
 +{
 +  char out[300];
 +  std::sort(s1.begin(), s1.end());
 +  std::sort(s2.begin(), s2.end());
 +  *std::set_intersection(s1.begin(), s1.end(), s2.begin(), s2.end(), out) = 0;
 +  return out;
 +}
 +bkstring Pattern::classNegate     (bkstring s1)                  const
 +{
 +  char out[300];
 +  int i, ind = 0;
 +  std::map<char, bool> m;
 +
 +  for (i = 0; i < (int)s1.size(); ++i) m[s1[i]] = 1;
 +  for (i = 0xFF; i >= 0; --i) if (m.find((char)i) == m.end()) out[ind++] = (char)i;
 +  out[ind] = 0;
 +  return bkstring(out, ind);
 +}
 +bkstring Pattern::classCreateRange(char low, char hi)    const
 +{
 +  char out[300];
 +  int ind = 0;
 +  while (low != hi) out[ind++] = low++;
 +  out[ind++] = low;
 +  return bkstring(out, ind);
 +}
 +
 +int Pattern::getInt(int start, int end)
 +{
 +  int ret = 0;
 +  for (; start <= end; ++start) ret = ret * 10 + (pattern[start] - '0');
 +  return ret;
 +}
 +bool Pattern::quantifyCurly(int & sNum, int & eNum)
 +{
 +  bool good = 1;
 +  int i, ci = curInd + 1;
 +  int commaInd = ci, endInd = ci, len = (int)pattern.size();
 +  sNum = eNum = 0;
 +
 +  while (endInd   < len     && pattern[endInd  ] != '}') ++endInd;
 +  while (commaInd < endInd  && pattern[commaInd] != ',') ++commaInd;
 +  if (endInd >= len) { raiseError(); return 0; }
 +  for (i = ci; good && i < endInd; ++i) if (i != commaInd && !isdigit(pattern[i])) good = 0;
 +  if (!good && commaInd < endInd) { raiseError(); return 0; }
 +  if (!good) return 0;
 +  /* so now everything in here is either a comma (and there is at most one comma) or a digit */
 +  if (commaInd == ci) // {,*}
 +  {
 +    if (endInd == commaInd + 1)    { sNum = MIN_QMATCH;               eNum = MAX_QMATCH;                        } // {,} = *
 +    else                           { sNum = MIN_QMATCH;               eNum = getInt(commaInd + 1, endInd - 1);  } // {,+}
 +  }
 +  else if (commaInd == endInd - 1) { sNum = getInt(ci, commaInd - 1); eNum = MAX_QMATCH;                        } // {+,}
 +  else if (commaInd == endInd)     { sNum = getInt(ci, endInd - 1);   eNum = sNum;                              } // {+}
 +  else                             { sNum = getInt(ci, commaInd - 1); eNum = getInt(commaInd + 1, endInd - 1);  } // {+,+}
 +  curInd = endInd + 1;
 +  return 1;
 +}
 +NFANode * Pattern::quantifyGroup(NFANode * start, NFANode * stop, const int gn)
 +{
 +  NFANode * newNode = NULL;
 +  int type = 0;
 +
 +  if (curInd < (int)pattern.size())
 +  {
 +    char ch = (curInd + 1 >= (int)pattern.size()) ? -1 : pattern[curInd + 1];
 +    switch (pattern[curInd])
 +    {
 +    case '*':
 +      ++curInd;
 +      switch (ch)
 +      {
 +      case '?': ++curInd; type = 1; break;
 +      case '+': ++curInd; type = 2; break;
 +      }
 +      newNode = registerNode(new NFAGroupLoopPrologueNode(gn));
 +      newNode->next = registerNode(new NFAGroupLoopNode(start, MIN_QMATCH, MAX_QMATCH, gn, type));
 +      stop->next = newNode->next;
 +      return newNode;
 +    case '?':
 +      ++curInd;
 +      switch (ch)
 +      {
 +      case '?': ++curInd; type = 1; break;
 +      case '+': ++curInd; type = 2; break;
 +      }
 +      newNode = registerNode(new NFAGroupLoopPrologueNode(gn));
 +      newNode->next = registerNode(new NFAGroupLoopNode(start, MIN_QMATCH, 1, gn, type));
 +      stop->next = newNode->next;
 +      return newNode;
 +    case '+':
 +      ++curInd;
 +      switch (ch)
 +      {
 +      case '?': ++curInd; type = 1; break;
 +      case '+': ++curInd; type = 2; break;
 +      }
 +      newNode = registerNode(new NFAGroupLoopPrologueNode(gn));
 +      newNode->next = registerNode(new NFAGroupLoopNode(start, 1, MAX_QMATCH, gn, type));
 +      stop->next = newNode->next;
 +      return newNode;
 +    case '{':
 +      {
 +        int s, e;
 +        if (quantifyCurly(s, e))
 +        {
 +          ch = (curInd < (int)pattern.size()) ? pattern[curInd] : -1;
 +          switch (ch)
 +          {
 +          case '?': ++curInd; type = 1; break;
 +          case '+': ++curInd; type = 2; break;
 +          }
 +          newNode = registerNode(new NFAGroupLoopPrologueNode(gn));
 +          newNode->next = registerNode(new NFAGroupLoopNode(start, s, e, gn, type));
 +          stop->next = newNode->next;
 +          return newNode;
 +        }
 +      }
 +    default:
 +      break;
 +    }
 +  }
 +  return NULL;
 +}
 +
 +NFANode * Pattern::quantify(NFANode * newNode)
 +{
 +  if (curInd < (int)pattern.size())
 +  {
 +    char ch = (curInd + 1 >= (int)pattern.size()) ? -1 : pattern[curInd + 1];
 +    switch (pattern[curInd])
 +    {
 +    case '*':
 +      ++curInd;
 +      switch (ch)
 +      {
 +      case '?': ++curInd; newNode = registerNode(new NFALazyQuantifierNode      (this, newNode, MIN_QMATCH, MAX_QMATCH)); break;
 +      case '+': ++curInd; newNode = registerNode(new NFAPossessiveQuantifierNode(this, newNode, MIN_QMATCH, MAX_QMATCH)); break;
 +      default:            newNode = registerNode(new NFAGreedyQuantifierNode    (this, newNode, MIN_QMATCH, MAX_QMATCH)); break;
 +      }
 +      break;
 +    case '?':
 +      ++curInd;
 +      switch (ch)
 +      {
 +      case '?': ++curInd; newNode = registerNode(new NFALazyQuantifierNode      (this, newNode, MIN_QMATCH, 1)); break;
 +      case '+': ++curInd; newNode = registerNode(new NFAPossessiveQuantifierNode(this, newNode, MIN_QMATCH, 1)); break;
 +      default:            newNode = registerNode(new NFAGreedyQuantifierNode    (this, newNode, MIN_QMATCH, 1)); break;
 +      }
 +      break;
 +    case '+':
 +      ++curInd;
 +      switch (ch)
 +      {
 +      case '?': ++curInd; newNode = registerNode(new NFALazyQuantifierNode      (this, newNode, 1, MAX_QMATCH)); break;
 +      case '+': ++curInd; newNode = registerNode(new NFAPossessiveQuantifierNode(this, newNode, 1, MAX_QMATCH)); break;
 +      default:            newNode = registerNode(new NFAGreedyQuantifierNode    (this, newNode, 1, MAX_QMATCH)); break;
 +      }
 +      break;
 +    case '{':
 +      {
 +        int s, e;
 +        if (quantifyCurly(s, e))
 +        {
 +          ch = (curInd < (int)pattern.size()) ? pattern[curInd] : -1;
 +          switch (ch)
 +          {
 +          case '?': ++curInd; newNode = registerNode(new NFALazyQuantifierNode      (this, newNode, s, e)); break;
 +          case '+': ++curInd; newNode = registerNode(new NFAPossessiveQuantifierNode(this, newNode, s, e)); break;
 +          default:            newNode = registerNode(new NFAGreedyQuantifierNode    (this, newNode, s, e)); break;
 +          }
 +        }
 +      }
 +      break;
 +    default:
 +      break;
 +    }
 +  }
 +  return newNode;
 +}
 +bkstring Pattern::parseClass()
 +{
 +  bkstring t, ret = "";
 +  char ch, c1, c2;
 +  bool inv = 0, neg = 0, quo = 0;
 +
 +  if (curInd < (int)pattern.size() && pattern[curInd] == '^')
 +  {
 +    ++curInd;
 +    neg = 1;
 +  }
 +  while (curInd < (int)pattern.size() && pattern[curInd] != ']')
 +  {
 +    ch = pattern[curInd++];
 +    if (ch == '[')
 +    {
 +      t = parseClass();
 +      ret = classUnion(ret, t);
 +    }
 +    /*else if (ch == '-')
 +    {
 +      raiseError();
 +      curInd = pattern.size();
 +    }*/
 +    else if (ch == '&' && curInd < (int)pattern.size() && pattern[curInd] == '&')
 +    {
 +      if (pattern[++curInd] != '[')
 +      {
 +        raiseError();
 +        curInd = (int)pattern.size();
 +      }
 +      else
 +      {
 +        ++curInd;
 +        t = parseClass();
 +        ret = classIntersect(ret, t);
 +      }
 +    }
 +    else if (ch == '\\')
 +    {
 +      t = parseEscape(inv, quo);
 +      if (quo)
 +      {
 +        raiseError();
 +        curInd = (int)pattern.size();
 +      }
 +      else if (inv || t.size() > 1) // cant be part of a range (a-z)
 +      {
 +        if (inv) t = classNegate(t);
 +        ret = classUnion(ret, t);
 +      }
 +      else if (curInd < (int)pattern.size() && pattern[curInd] == '-') // part of a range (a-z)
 +      {
 +        c1 = t[0];
 +        ++curInd;
 +        if (curInd >= (int)pattern.size()) raiseError();
 +        else
 +        {
 +          c2 = pattern[curInd++];
 +          if (c2 == '\\')
 +          {
 +            t = parseEscape(inv, quo);
 +            if (quo)
 +            {
 +              raiseError();
 +              curInd = (int)pattern.size();
 +            }
 +            else if (inv || t.size() > 1) raiseError();
 +            else ret = classUnion(ret, classCreateRange(c1, c2));
 +          }
 +          else if (c2 == '[' || c2 == ']' || c2 == '-' || c2 == '&')
 +          {
 +            raiseError();
 +            curInd = (int)pattern.size();
 +          }
 +          else ret = classUnion(ret, classCreateRange(c1, c2));
 +        }
 +      }
 +      else
 +      {
 +        ret = classUnion(ret, t);
 +      }
 +    }
 +    else if (curInd < (int)pattern.size() && pattern[curInd] == '-')
 +    {
 +      c1 = ch;
 +      ++curInd;
 +      if (curInd >= (int)pattern.size()) raiseError();
 +      else
 +      {
 +        c2 = pattern[curInd++];
 +        if (c2 == '\\')
 +        {
 +          t = parseEscape(inv, quo);
 +          if (quo)
 +          {
 +            raiseError();
 +            curInd = (int)pattern.size();
 +          }
 +          else if (inv || t.size() > 1) raiseError();
 +          else ret = classUnion(ret, classCreateRange(c1, c2));
 +        }
 +        else if (c2 == '[' || c2 == ']' || c2 == '-' || c2 == '&')
 +        {
 +          raiseError();
 +          curInd = (int)pattern.size();
 +        }
 +        else
 +        {
 +          ret = classUnion(ret, classCreateRange(c1, c2));
 +        }
 +      }
 +    }
 +    else
 +    {
 +      ret += " ";
 +      ret[ret.size() - 1] = ch;
 +    }
 +  }
 +  if (curInd >= (int)pattern.size() || pattern[curInd] != ']')
 +  {
 +    raiseError();
 +    ret = "";
 +  }
 +  else
 +  {
 +    ++curInd;
 +    if (neg) ret = classNegate(ret);
 +  }
 +  return ret;
 +}
 +bkstring Pattern::parsePosix()
 +{
 +  bkstring s7 = pattern.substr(curInd, 7);
 +  if (s7 == "{Lower}")  { curInd += 7; return "abcdefghijklmnopqrstuvwxyz";                                                                       }
 +  if (s7 == "{Upper}")  { curInd += 7; return "ABCDEFGHIJKLMNOPQRSTUVWXYZ";                                                                       }
 +  if (s7 == "{Alpha}")  { curInd += 7; return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";                                             }
 +  if (s7 == "{Digit}")  { curInd += 7; return "0123456789";                                                                                       }
 +  if (s7 == "{Alnum}")  { curInd += 7; return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";                                   }
 +  if (s7 == "{Punct}")  { curInd += 7; return "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~";                                                               }
 +  if (s7 == "{Graph}")  { curInd += 7; return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; }
 +  if (s7 == "{Print}")  { curInd += 7; return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; }
 +  if (s7 == "{Blank}")  { curInd += 7; return " \t";                                                                                              }
 +  if (s7 == "{Space}")  { curInd += 7; return " \t\n\x0B\f\r";                                                                                    }
 +  if (s7 == "{Cntrl}")
 +  {
 +	bkstring::value_type i;
 +    bkstring s = " ";
 +
 +    for (i = 0; i < 5; ++i) s += s;
 +    s += " ";
 +    for (i = 0; i <= 0x1F; ++i) s[i] = i;
 +    s[0x20] = 0x7F;
 +    curInd += 7;
 +    return s;
 +  }
 +  if (s7 == "{ASCII}")
 +  {
 +    bkstring s(0x80, ' ');
 +    for (int i = 0; i <= 0x7f; ++i) s[i] = (bkstring::value_type)i;
 +    curInd += 7;
 +    return s;
 +  }
 +  if (pattern.substr(curInd, 8) == "{XDigit}") { curInd += 8; return "abcdefABCDEF0123456789"; }
 +  raiseError();
 +  return "";
 +}
 +NFANode * Pattern::parseBackref()
 +{
 +  #define is_dig(x) ((x) >= '0' && (x) <= '9')
 +  #define to_int(x) ((x) - '0')
 +  int ci = curInd;
 +  int oldRef = 0, ref = 0;
 +
 +  while (ci < (int)pattern.size() && is_dig(pattern[ci]) && (ref < 10 || ref < groupCount))
 +  {
 +    oldRef = ref;
 +    ref = ref * 10 + to_int(pattern[ci++]);
 +  }
 +  if (ci == (int)pattern.size())
 +  {
 +    oldRef = ref;
 +    ++ci;
 +  }
 +  if (oldRef < 0 || ci <= curInd)
 +  {
 +    raiseError();
 +    return registerNode(new NFAReferenceNode(-1));
 +  }
 +  curInd = ci;
 +  return registerNode(new NFAReferenceNode(ref));
 +
 +  #undef is_dig
 +  #undef to_int
 +}
 +bkstring Pattern::parseOctal()
 +{
 +  #define islowoc(x)  ((x) >= '0' && (x) <= '3')
 +  #define isoc(x)     ((x) >= '0' && (x) <= '7')
 +  #define fromoc(x)   ((x) - '0')
 +  int ci = curInd;
 +  char ch1 = (ci + 0 < (int)pattern.size()) ? pattern[ci + 0] : -1;
 +  char ch2 = (ci + 1 < (int)pattern.size()) ? pattern[ci + 1] : -1;
 +  char ch3 = (ci + 2 < (int)pattern.size()) ? pattern[ci + 2] : -1;
 +  bkstring s = " ";
 +
 +  if (islowoc(ch1) && isoc(ch2))
 +  {
 +    curInd += 2;
 +    s[0] = fromoc(ch1) * 8 + fromoc(ch2);
 +    if (isoc(ch3))
 +    {
 +      ++curInd;
 +      s[0] = s[0] * 8 + fromoc(ch3);
 +    }
 +  }
 +  else if (isoc(ch1) && isoc(ch2))
 +  {
 +    curInd += 2;
 +    s[0] = fromoc(ch1) * 8 + fromoc(ch2);
 +  }
 +  else raiseError();
 +
 +  return s;
 +  #undef islowoc
 +  #undef isoc
 +  #undef fromoc
 +}
 +bkstring Pattern::parseHex()
 +{
 +  #define to_low(x)   (((x) >= 'A' && (x) <= 'Z') ? ((x) - 'A' + 'a') : (x))
 +  #define is_dig(x)   ((x) >= '0' && (x) <= '9')
 +  #define is_hex(x)   (is_dig(x) || (to_low(x) >= 'a' && to_low(x) <= 'f'))
 +  #define to_int(x)   ((is_dig(x)) ? ((x) - '0') : (to_low(x) - 'a' + 10))
 +
 +  int ci = curInd;
 +  char ch1 = (ci + 0 < (int)pattern.size()) ? pattern[ci + 0] : -1;
 +  char ch2 = (ci + 1 < (int)pattern.size()) ? pattern[ci + 1] : -1;
 +  bkstring s = " ";
 +
 +  if (is_hex(ch1) && is_hex(ch2))
 +  {
 +    curInd += 2;
 +    s[0] = (to_int(ch1) << 4 & 0xF0) | (to_int(ch2) & 0x0F);
 +  }
 +
 +  return s;
 +  #undef to_low
 +  #undef is_dig
 +  #undef is_hex
 +  #undef to_int
 +}
 +bkstring Pattern::parseEscape(bool & inv, bool & quo)
 +{
 +  char ch = pattern[curInd++];
 +  bkstring classes = "";
 +
 +  if (curInd > (int)pattern.size())
 +  {
 +    raiseError();
 +    return NULL;
 +  }
 +
 +  quo = 0;
 +  inv = 0;
 +  switch (ch)
 +  {
 +  case 'p': classes = parsePosix();                                                         break;
 +  case 'P': classes = "!!"; classes += parsePosix();                                        break;
 +  case 'd': classes = "0123456789";                                                         break;
 +  case 'D': classes = "!!0123456789";                                                       break;
 +  case 's': classes = " \t\r\n\f";                                                          break;
 +  case 'S': classes = "!! \t\r\n\f";                                                        break;
 +  case 'w': classes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";    break;
 +  case 'W': classes = "!!abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";  break;
 +  case '0': classes = parseOctal(); break;
 +  case 'x': classes = parseHex();   break;
 +
 +  case 'Q': quo = 1;        break;
 +  case 't': classes = "\t"; break;
 +  case 'r': classes = "\r"; break;
 +  case 'n': classes = "\n"; break;
 +  case 'f': classes = "\f"; break;
 +  case 'a': classes = "\a"; break;
 +  case 'e': classes = "\r"; break;
 +  default:  classes = " "; classes[0] = ch; break;
 +  }
 +  if (classes.substr(0, 2) == "!!")
 +  {
 +    classes = classes.substr(2);
 +    inv = 1;
 +  }
 +  return classes;
 +}
 +NFANode * Pattern::parseRegisteredPattern(NFANode ** end)
 +{
 +  int i, j;
 +  bkstring s;
 +  NFANode * ret = NULL;
 +  for (i = curInd; i < (int)pattern.size() && pattern[i] != '}'; ++i) { }
 +  if (pattern[i] != '}') { raiseError(); return NULL; }
 +  if (i == curInd + 1)   { raiseError(); return NULL; } // {}
 +  if (
 +      !(
 +        (pattern[curInd] >= 'a' && pattern[curInd] <= 'z') ||
 +        (pattern[curInd] >= 'A' && pattern[curInd] <= 'Z') ||
 +        (pattern[curInd] == '_')
 +       )
 +     )
 +  {
 +    raiseError();
 +    return NULL;
 +  }
 +  for (j = curInd; !error && j < i; ++j)
 +  {
 +    if (
 +        !(
 +          (pattern[j] >= 'a' && pattern[j] <= 'z') ||
 +          (pattern[j] >= 'A' && pattern[j] <= 'Z') ||
 +          (pattern[j] >= '0' && pattern[j] <= '9') ||
 +          (pattern[j] == '_')
 +         )
 +        )
 +    {
 +      raiseError();
 +      return NULL;
 +    }
 +  }
 +  s = pattern.substr(curInd, i - curInd);
 +  if (registeredPatterns.find(s) == registeredPatterns.end()) raiseError();
 +  else
 +  {
 +    unsigned long oflags = flags;
 +    bkstring op = pattern;
 +    int ci = i + 1;
 +
 +    pattern = registeredPatterns[s].first;
 +    curInd = 0;
 +    flags = registeredPatterns[s].second;
 +
 +    --groupCount;
 +    ret = parse(0, 0, end);
 +
 +    pattern = op;
 +    curInd = ci;
 +    flags = oflags;
 +  }
 +  if (error) { *end = ret = NULL; }
 +  return ret;
 +}
 +
 +// look behind should interpret everything as a literal (except \\) since the
 +// pattern must have a concrete length
 +NFANode * Pattern::parseBehind(const bool pos, NFANode ** end)
 +{
 +  bkstring t = "";
 +  while (curInd < (int)pattern.size() && pattern[curInd] != ')')
 +  {
 +    char ch = pattern[curInd++];
 +    t += " ";
 +    if (ch == '\\')
 +    {
 +      if (curInd + 1 >= (int)pattern.size())
 +      {
 +        raiseError();
 +        return *end = registerNode(new NFACharNode(' '));
 +      }
 +      ch = pattern[curInd++];
 +    }
 +    t[t.size() - 1] = ch;
 +  }
 +  if (curInd >= (int)pattern.size() || pattern[curInd] != ')') raiseError();
 +  else ++curInd;
 +  return *end = registerNode(new NFALookBehindNode(t, pos));
 +}
 +NFANode * Pattern::parseQuote()
 +{
 +  bool done = 0;
 +  bkstring s = "";
 +
 +  while (!done)
 +  {
 +    if      (curInd >= (int)pattern.size())
 +    {
 +      raiseError();
 +      done = 1;
 +    }
 +    else if (pattern.substr(curInd, 2) == "\\E")
 +    {
 +      curInd += 2;
 +      done = 1;
 +    }
 +    else if (pattern[curInd] == '\\')
 +    {
 +      s += " ";
 +      s[s.size() - 1] = pattern[++curInd];
 +      ++curInd;
 +    }
 +    else
 +    {
 +      s += " ";
 +      s[s.size() - 1] = pattern[curInd++];
 +    }
 +  }
 +  if ((flags & Pattern::CASE_INSENSITIVE) != 0) return registerNode(new NFACIQuoteNode(s));
 +  return registerNode(new NFAQuoteNode(s));
 +}
 +NFANode * Pattern::parse(const bool inParen, const bool inOr, NFANode ** end)
 +{
 +  NFANode * start, * cur, * next = NULL;
 +  bkstring t;
 +  int grc = groupCount++;
 +  bool inv, quo;
 +  bool ahead = 0, pos = 0, noncap = 0, indep = 0;
 +  unsigned long oldFlags = flags;
 +
 +  if (inParen)
 +  {
 +    if (pattern[curInd] == '?')
 +    {
 +      ++curInd;
 +      --groupCount;
 +      if      (pattern[curInd]           == ':')  { noncap = 1; ++curInd;     grc = --nonCapGroupCount; }
 +      else if (pattern[curInd]           == '=')  { ++curInd;     ahead = 1;  pos = 1;                  }
 +      else if (pattern[curInd]           == '!')  { ++curInd;     ahead = 1;  pos = 0;                  }
 +      else if (pattern.substr(curInd, 2) == "<=") { curInd += 2;  return parseBehind(1, end);           }
 +      else if (pattern.substr(curInd, 2) == "<!") { curInd += 2;  return parseBehind(0, end);           }
 +      else if (pattern[curInd]           == '>')  { ++curInd;     indep = 1;                            }
 +      else
 +      {
 +        bool negate = false, done = false;
 +        while (!done)
 +        {
 +          if (curInd >= (int)pattern.size())
 +          {
 +            raiseError();
 +            return NULL;
 +          }
 +          else if (negate)
 +          {
 +            switch (pattern[curInd])
 +            {
 +            case 'i': flags &= ~Pattern::CASE_INSENSITIVE;    break;
 +            case 'd': flags &= ~Pattern::UNIX_LINE_MODE;      break;
 +            case 'm': flags &= ~Pattern::MULTILINE_MATCHING;  break;
 +            case 's': flags &= ~Pattern::DOT_MATCHES_ALL;     break;
 +            case ':': done = true;                            break;
 +            case ')':
 +              ++curInd;
 +              *end = registerNode(new NFALookBehindNode("", true));
 +              return *end;
 +            case '-':
 +            default: raiseError(); return NULL;
 +            }
 +          }
 +          else
 +          {
 +            switch (pattern[curInd])
 +            {
 +            case 'i': flags |= Pattern::CASE_INSENSITIVE;     break;
 +            case 'd': flags |= Pattern::UNIX_LINE_MODE;       break;
 +            case 'm': flags |= Pattern::MULTILINE_MATCHING;   break;
 +            case 's': flags |= Pattern::DOT_MATCHES_ALL;      break;
 +            case ':': done = true;                            break;
 +            case '-': negate = true;                          break;
 +            case ')':
 +              ++curInd;
 +              *end = registerNode(new NFALookBehindNode("", true));
 +              return *end;
 +            default:  raiseError(); return NULL;
 +            }
 +          }
 +          ++curInd;
 +        }
 +        noncap = 1;
 +        grc = --nonCapGroupCount;
 +      }
 +      if (noncap) cur = start = registerNode(new NFAGroupHeadNode(grc));
 +      else        cur = start = registerNode(new NFASubStartNode);
 +    }
 +    else cur = start = registerNode(new NFAGroupHeadNode(grc));
 +  }
 +  else cur = start = registerNode(new NFASubStartNode);
 +  while (curInd < (int)pattern.size())
 +  {
 +    char ch = pattern[curInd++];
 +
 +    next = NULL;
 +    if (error) return NULL;
 +    switch (ch)
 +    {
 +    case '^':
 +      if ((flags & Pattern::MULTILINE_MATCHING) != 0) next = registerNode(new NFAStartOfLineNode);
 +      else                                            next = registerNode(new NFAStartOfInputNode);
 +      break;
 +    case '$':
 +      if ((flags & Pattern::MULTILINE_MATCHING) != 0) next = registerNode(new NFAEndOfLineNode);
 +      else                                            next = registerNode(new NFAEndOfInputNode(0));
 +      break;
 +    case '|':
 +      --groupCount;
 +      cur->next = registerNode(new NFAAcceptNode);
 +      cur = start = registerNode(new NFAOrNode(start, parse(inParen, 1)));
 +      break;
 +    case '\\':
 +      if      (curInd < (int)pattern.size())
 +      {
 +        bool eoi = 0;
 +        switch (pattern[curInd])
 +        {
 +        case '1':
 +        case '2':
 +        case '3':
 +        case '4':
 +        case '5':
 +        case '6':
 +        case '7':
 +        case '8':
 +        case '9': next = parseBackref(); break;
 +        case 'A': ++curInd; next = registerNode(new NFAStartOfInputNode);     break;
 +        case 'B': ++curInd; next = registerNode(new NFAWordBoundaryNode(0));  break;
 +        case 'b': ++curInd; next = registerNode(new NFAWordBoundaryNode(1));  break;
 +        case 'G': ++curInd; next = registerNode(new NFAEndOfMatchNode);       break;
 +        case 'Z': eoi = 1;
 +        case 'z': ++curInd; next = registerNode(new NFAEndOfInputNode(eoi));  break;
 +        default:
 +          t = parseEscape(inv, quo);
 +          if (!quo)
 +          {
 +            if (t.size() > 1 || inv)
 +            {
 +              if ((flags & Pattern::CASE_INSENSITIVE) != 0) next = registerNode(new NFACIClassNode(t, inv));
 +              else                                          next = registerNode(new NFAClassNode(t, inv));
 +            }
 +            else
 +            {
 +              next = registerNode(new NFACharNode(t[0]));
 +            }
 +          }
 +          else
 +          {
 +            next = parseQuote();
 +          }
 +        }
 +      }
 +      else raiseError();
 +      break;
 +    case '[':
 +      if ((flags & Pattern::CASE_INSENSITIVE) == 0)
 +      {
 +        NFAClassNode * clazz = new NFAClassNode();
 +        bkstring s = parseClass();
 +        for (int i = 0; i < (int)s.size(); ++i) clazz->vals[s[i]] = 1;
 +        next = registerNode(clazz);
 +      }
 +      else
 +      {
 +        NFACIClassNode * clazz = new NFACIClassNode();
 +        bkstring s = parseClass();
 +        for (int i = 0; i < (int)s.size(); ++i) clazz->vals[to_lower(s[i])] = 1;
 +        next = registerNode(clazz);
 +      }
 +      break;
 +    case '.':
 +      {
 +        bool useN = 1, useR = 1;
 +        NFAClassNode * clazz = new NFAClassNode(1);
 +        if ((flags & Pattern::UNIX_LINE_MODE)  != 0) useR = 0;
 +        if ((flags & Pattern::DOT_MATCHES_ALL) != 0) useN = useR = 0;
 +        if (useN) clazz->vals['\n'] = 1;
 +        if (useR) clazz->vals['\r'] = 1;
 +        next = registerNode(clazz);
 +      }
 +      break;
 +    case '(':
 +      {
 +        NFANode * end, * t1, * t2;
 +        t1 = parse(1, 0, &end);
 +        if (!t1) raiseError();
 +        else if (t1->isGroupHeadNode() && (t2 = quantifyGroup(t1, end, grc)) != NULL)
 +        {
 +          cur->next = t2;
 +          cur = t2->next;
 +        }
 +        else
 +        {
 +          cur->next = t1;
 +          cur = end;
 +        }
 +      }
 +      break;
 +    case ')':
 +      if (!inParen) raiseError();
 +      else if (inOr)
 +      {
 +        --curInd;
 +        cur = cur->next = registerNode(new NFAAcceptNode);
 +        flags = oldFlags;
 +        return start;
 +      }
 +      else
 +      {
 +        if (ahead)
 +        {
 +          cur = cur->next = registerNode(new NFAAcceptNode);
 +          flags = oldFlags;
 +          return *end = registerNode(new NFALookAheadNode(start, pos));
 +        }
 +        else if (indep)
 +        {
 +          cur = cur->next = registerNode(new NFAAcceptNode);
 +          flags = oldFlags;
 +          return *end = registerNode(new NFAPossessiveQuantifierNode(this, start, 1, 1));
 +        }
 +        else // capping or noncapping, it doesnt matter
 +        {
 +          *end = cur = cur->next = registerNode(new NFAGroupTailNode(grc));
 +          next = quantifyGroup(start, *end, grc);
 +          if (next)
 +          {
 +            start = next;
 +            *end = next->next;
 +          }
 +          flags = oldFlags;
 +          return start;
 +        }
 +      }
 +      break;
 +    case '{': // registered pattern
 +      cur->next = parseRegisteredPattern(&next);
 +      if (cur->next) cur = next;
 +      break;
 +    case '*':
 +    case '+':
 +    case '?':
 +//    case '}':
 +//    case ']':
 +      raiseError();
 +      break;
 +    default:
 +      if ((flags & Pattern::CASE_INSENSITIVE) != 0) next = registerNode(new NFACICharNode(ch));
 +      else                                          next = registerNode(new NFACharNode(ch));
 +      break;
 +    }
 +    if (next)
 +    {
 +      cur = cur->next = quantify(next);
 +    }
 +  }
 +  if (inParen) raiseError();
 +  else
 +  {
 +    if (inOr) cur = cur->next = registerNode(new NFAAcceptNode);
 +    if (end) *end = cur;
 +  }
 +
 +  flags = oldFlags;
 +  if (error) return NULL;
 +
 +  return start;
 +}
 +
 +Pattern * Pattern::compile(const bkstring & pattern, const unsigned long mode)
 +{
 +  Pattern * p = new Pattern(pattern);
 +  NFANode * end;
 +
 +  p->flags = mode;
 +  if ((mode & Pattern::LITERAL) != 0)
 +  {
 +    p->head = p->registerNode(new NFAStartNode);
 +    if ((mode & Pattern::CASE_INSENSITIVE) != 0)  p->head->next = p->registerNode(new NFACIQuoteNode(pattern));
 +    else                                          p->head->next = p->registerNode(new NFAQuoteNode(pattern));
 +    p->head->next->next = p->registerNode(new NFAEndNode);
 +  }
 +  else
 +  {
 +    p->head = p->parse(0, 0, &end);
 +    if (!p->head)
 +    {
 +      delete p;
 +      p = NULL;
 +    }
 +    else
 +    {
 +      if (!(p->head && p->head->isStartOfInputNode()))
 +      {
 +        NFANode * n = p->registerNode(new NFAStartNode);
 +        n->next = p->head;
 +        p->head = n;
 +      }
 +      end->next = p->registerNode(new NFAEndNode);
 +    }
 +  }
 +  if (p != NULL)
 +  {
 +    p->matcher = new Matcher(p, "");
 +  }
 +
 +  return p;
 +}
 +
 +Pattern * Pattern::compileAndKeep(const bkstring & pattern, const unsigned long mode)
 +{
 +  Pattern * ret = NULL;
 +  std::map<bkstring, Pattern*>::iterator it = compiledPatterns.find(pattern);
 +
 +  if (it != compiledPatterns.end())
 +  {
 +    ret = it->second;
 +  }
 +  else
 +  {
 +    ret = compile(pattern, mode);
 +    compiledPatterns[pattern] = ret;
 +  }
 +
 +  return ret;
 +}
 +bkstring Pattern::replace(const bkstring & pattern, const bkstring & str,
 +                             const bkstring & replacementText, const unsigned long mode)
 +{
 +  bkstring ret;
 +  Pattern * p = Pattern::compile(pattern, mode);
 +  if (p)
 +  {
 +    ret = p->replace(str, replacementText);
 +    delete p;
 +  }
 +  return ret;
 +}
 +
 +std::vector<bkstring> Pattern::split(const bkstring & pattern, const bkstring & str, const bool keepEmptys,
 +                              const unsigned long limit, const unsigned long mode)
 +{
 +  std::vector<bkstring> ret;
 +  Pattern * p = Pattern::compile(pattern, mode);
 +  if (p)
 +  {
 +    ret = p->split(str, keepEmptys, limit);
 +    delete p;
 +  }
 +  return ret;
 +}
 +
 +std::vector<bkstring> Pattern::findAll(const bkstring & pattern, const bkstring & str, const unsigned long mode)
 +{
 +  std::vector<bkstring> ret;
 +  Pattern * p = Pattern::compile(pattern, mode);
 +  if (p)
 +  {
 +    ret = p->findAll(str);
 +    delete p;
 +  }
 +  return ret;
 +}
 +
 +bool Pattern::matches(const bkstring & pattern, const bkstring & str, const unsigned long mode)
 +{
 +  bool ret = 0;
 +  Pattern * p = compile(pattern, mode);
 +
 +  if (p)
 +  {
 +    ret = p->matches(str);
 +    delete p;
 +  }
 +
 +  return ret;
 +}
 +
 +bool Pattern::registerPattern(const bkstring & name, const bkstring & pattern, const unsigned long mode)
 +{
 +  Pattern * p = Pattern::compile(pattern, mode);
 +  if (!p) return 0;
 +  Pattern::registeredPatterns[name] = std::make_pair(pattern, mode);
 +  delete p;
 +  return 1;
 +}
 +
 +void Pattern::unregisterPatterns()
 +{
 +  registeredPatterns.clear();
 +}
 +void Pattern::clearPatternCache()
 +{
 +  std::map<bkstring, Pattern*>::iterator it;
 +  for (it = compiledPatterns.begin(); it != compiledPatterns.end(); ++it)
 +  {
 +    delete it->second;
 +  }
 +  compiledPatterns.clear();
 +}
 +
 +std::pair<bkstring, int>  Pattern::findNthMatch(const bkstring & pattern, const bkstring & str,
 +                                         const int matchNum, const unsigned long mode)
 +{
 +  std::pair<bkstring, int> ret;
 +  Pattern * p = Pattern::compile(pattern, mode);
 +
 +  ret.second = -1;
 +  if (p)
 +  {
 +    int i = -1;
 +    p->matcher->setString(str);
 +    while (i < matchNum && p->matcher->findNextMatch()) { ++i; }
 +    if (i == matchNum && p->matcher->getStartingIndex() >= 0)
 +    {
 +      ret.first = p->matcher->getGroup(0);
 +      ret.second = p->matcher->getStartingIndex();
 +    }
 +    delete p;
 +  }
 +
 +  return ret;
 +}
 +
 +Pattern::~Pattern()
 +{
 +  if (matcher) delete matcher;
 +  for (std::map<NFANode*, bool>::iterator it = nodes.begin(); it != nodes.end(); ++it)
 +  {
 +    delete it->first;
 +  }
 +}
 +bkstring Pattern::replace(const bkstring & str, const bkstring & replacementText)
 +{
 +  int li = 0;
 +  bkstring ret = "";
 +
 +  matcher->setString(str);
 +  while (matcher->findNextMatch())
 +  {
 +    ret += str.substr(li, matcher->getStartingIndex() - li);
 +    ret += matcher->replaceWithGroups(replacementText);
 +    li = matcher->getEndingIndex();
 +  }
 +  ret += str.substr(li);
 +
 +  return ret;
 +}
 +std::vector<bkstring> Pattern::split(const bkstring & str, const bool keepEmptys, const unsigned long limit)
 +{
 +  unsigned long lim = (limit == 0 ? MAX_QMATCH : limit);
 +  int li = 0;
 +  std::vector<bkstring> ret;
 +
 +  matcher->setString(str);
 +
 +  while (matcher->findNextMatch() && ret.size() < lim)
 +  {
 +    if (matcher->getStartingIndex() == 0 && keepEmptys) ret.push_back("");
 +    if ((matcher->getStartingIndex() != matcher->getEndingIndex()) || keepEmptys)
 +    {
 +      if (li != matcher->getStartingIndex() || keepEmptys)
 +      {
 +        ret.push_back(str.substr(li, matcher->getStartingIndex() - li));
 +      }
 +      li = matcher->getEndingIndex();
 +    }
 +  }
 +  if (li < (int)str.size()) ret.push_back(str.substr(li));
 +
 +  return ret;
 +}
 +std::vector<bkstring> Pattern::findAll(const bkstring & str)
 +{
 +  matcher->setString(str);
 +  return matcher->findAll();
 +}
 +bool Pattern::matches(const bkstring & str)
 +{
 +  matcher->setString(str);
 +  return matcher->matches();
 +}
 +unsigned long Pattern::getFlags() const
 +{
 +  return flags;
 +}
 +bkstring Pattern::getPattern() const
 +{
 +  return pattern;
 +}
 +Matcher * Pattern::createMatcher(const bkstring & str)
 +{
 +  return new Matcher(this, str);
 +}
 +
 +// NFANode
 +
 +NFANode::NFANode() { next = NULL; }
 +NFANode::~NFANode() { }
 +void NFANode::findAllNodes(std::map<NFANode*, bool> & soFar)
 +{
 +  if (soFar.find(this) == soFar.end()) return;
 +  soFar[this] = 1;
 +  if (next) next->findAllNodes(soFar);
 +}
 +
 +// NFACharNode
 +
 +NFACharNode::NFACharNode(const char c) { ch = c; }
 +int NFACharNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
 +{
 +  if (curInd < (int)str.size() && str[curInd] == ch) return next->match(str, matcher, curInd + 1);
 +  return -1;
 +}
 +
 +// NFACICharNode
 +
 +NFACICharNode::NFACICharNode(const char c) { ch = to_lower(c); }
 +int NFACICharNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
 +{
 +  if (curInd < (int)str.size() && to_lower(str[curInd]) == ch) return next->match(str, matcher, curInd + 1);
 +  return -1;
 +}
 +
 +// NFAStartNode
 +
 +NFAStartNode::NFAStartNode() { }
 +int NFAStartNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
 +{
 +  int ret = -1, ci = curInd;
 +
 +  matcher->starts[0] = curInd;
 +  if ((matcher->getFlags() & Matcher::MATCH_ENTIRE_STRING) == (unsigned int)Matcher::MATCH_ENTIRE_STRING)
 +  {
 +    if (curInd != 0)
 +    {
 +      matcher->starts[0] = -1;
 +      return -1;
 +    }
 +    return next->match(str, matcher, 0);
 +  }
 +  while ((ret = next->match(str, matcher, ci)) == -1 && ci < (int)str.size())
 +  {
 +    matcher->clearGroups();
 +    matcher->starts[0] = ++ci;
 +  }
 +  if (ret < 0) matcher->starts[0] = -1;
 +  return ret;
 +}
 +
 +// NFAEndNode
 +
 +NFAEndNode::NFAEndNode() { }
 +int NFAEndNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
 +{
 +  matcher->ends[0] = curInd;
 +  if ((matcher->getFlags() & Matcher::MATCH_ENTIRE_STRING) != 0)
 +  {
 +    if (curInd == (int)str.size()) return curInd;
 +    matcher->ends[0] = -1;
 +    return -1;
 +  }
 +  return curInd;
 +}
 +
 +// NFAQuantifierNode
 +
 +void NFAQuantifierNode::findAllNodes(std::map<NFANode*, bool> & soFar)
 +{
 +  inner->findAllNodes(soFar);
 +  NFANode::findAllNodes(soFar);
 +}
 +NFAQuantifierNode::NFAQuantifierNode(Pattern * pat, NFANode * internal, const int minMatch, const int maxMatch)
 +{
 +  inner = internal;
 +  inner->next = pat->registerNode(new NFAAcceptNode);
 +  min = (minMatch < Pattern::MIN_QMATCH) ? Pattern::MIN_QMATCH : minMatch;
 +  max = (maxMatch > Pattern::MAX_QMATCH) ? Pattern::MAX_QMATCH : maxMatch;
 +}
 +
 +int NFAQuantifierNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
 +{
 +  int i0, i1, i2 = 0;
 +
 +  i0 = i1 = curInd;
 +  while (i2 < min)
 +  {
 +
 +    ++i2;
 +    i1 = inner->match(str, matcher, i0);
 +    if (i1 <= i0) return i1; // i1 < i0 means i1 is -1
 +    i0 = i1;
 +  }
 +
 +  return i1;
 +}
 +// NFAGreedyQuantifierNode
 +
 +NFAGreedyQuantifierNode::NFAGreedyQuantifierNode(Pattern * pat, NFANode * internal, const int minMatch, const int maxMatch)
 +                        : NFAQuantifierNode(pat, internal, minMatch, maxMatch) { }
 +int NFAGreedyQuantifierNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
 +{
 +  int t = NFAQuantifierNode::match(str, matcher, curInd);
 +  if (t != -1) return matchInternal(str, matcher, t, min);
 +  return t;
 +}
 +int NFAGreedyQuantifierNode::matchInternal(const bkstring & str, Matcher * matcher, const int curInd, const int soFar) const
 +{
 +  if (soFar >= max) return next->match(str, matcher, curInd);
 +
 +  int i, j;
 +
 +  i = inner->match(str, matcher, curInd);
 +  if (i != -1)
 +  {
 +    j = matchInternal(str, matcher, i, soFar + 1);
 +    if (j != -1) return j;
 +  }
 +  return next->match(str, matcher, curInd);
 +}
 +
 +// NFALazyQuantifierNode
 +
 +NFALazyQuantifierNode::NFALazyQuantifierNode(Pattern * pat, NFANode * internal, const int minMatch, const int maxMatch)
 +                      : NFAQuantifierNode(pat, internal, minMatch, maxMatch) { }
 +int NFALazyQuantifierNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
 +{
 +  int i, j, m = NFAQuantifierNode::match(str, matcher, curInd);
 +
 +  if (m == -1) return -1;
 +
 +  for (i = min; i < max; ++i)
 +  {
 +    j = next->match(str, matcher, m);
 +    if (j == -1)
 +    {
 +      j = inner->match(str, matcher, m);
 +      // if j < m, then j is -1, so we bail.
 +      // if j == m, then we would just go and call next->match on the same index,
 +      // but it already failed trying to match right there, so we know we can
 +      // just bail
 +      if (j <= m) return -1;
 +      m = j;
 +    }
 +    else return j;
 +  }
 +  return next->match(str, matcher, m);
 +}
 +
 +// NFAPossessiveQuantifierNode
 +
 +NFAPossessiveQuantifierNode::NFAPossessiveQuantifierNode(Pattern * pat, NFANode * internal, const int minMatch, const int maxMatch)
 +                            : NFAQuantifierNode(pat, internal, minMatch, maxMatch) { }
 +int NFAPossessiveQuantifierNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
 +{
 +  int i, j, m = NFAQuantifierNode::match(str, matcher, curInd);
 +
 +  if (m == -1) return -1;
 +  for (i = min; i < max; ++i)
 +  {
 +    j = inner->match(str, matcher, m);
 +    if (j <= m) return next->match(str, matcher, m);
 +    m = j;
 +  }
 +  return next->match(str, matcher, m);
 +}
 +
 +// NFAAcceptNode
 +
 +NFAAcceptNode::NFAAcceptNode() { }
 +int NFAAcceptNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
 +{
 +  if (!next) return curInd;
 +  else return next->match(str, matcher, curInd);
 +}
 +
 +// NFAClassNode
 +
 +NFAClassNode::NFAClassNode(const bool invert)
 +{
 +  inv = invert;
 +}
 +NFAClassNode::NFAClassNode(const bkstring & clazz, const bool invert)
 +{
 +  inv = invert;
 +  for (int i = 0; i < (int)clazz.size(); ++i) vals[clazz[i]] = 1;
 +}
 +int NFAClassNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
 +{
 +  if (curInd < (int)str.size() && ((vals.find(str[curInd]) != vals.end()) ^ inv))
 +  {
 +    return next->match(str, matcher, curInd + 1);
 +  }
 +  return -1;
 +}
 +
 +// NFACIClassNode
 +
 +NFACIClassNode::NFACIClassNode(const bool invert)
 +{
 +  inv = invert;
 +}
 +NFACIClassNode::NFACIClassNode(const bkstring & clazz, const bool invert)
 +{
 +  inv = invert;
 +  for (int i = 0; i < (int)clazz.size(); ++i) vals[to_lower(clazz[i])] = 1;
 +}
 +int NFACIClassNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
 +{
 +  if (curInd < (int)str.size() && ((vals.find(to_lower(str[curInd])) != vals.end()) ^ inv))
 +  {
 +    return next->match(str, matcher, curInd + 1);
 +  }
 +  return -1;
 +}
 +
 +// NFASubStartNode
 +
 +NFASubStartNode::NFASubStartNode() { }
 +int NFASubStartNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
 +{
 +  return next->match(str, matcher, curInd);
 +}
 +
 +// NFAOrNode
 +
 +NFAOrNode::NFAOrNode(NFANode * first, NFANode * second) : one(first), two(second) { }
 +void NFAOrNode::findAllNodes(std::map<NFANode*, bool> & soFar)
 +{
 +  if (one) one->findAllNodes(soFar);
 +  if (two) two->findAllNodes(soFar);
 +  NFANode::findAllNodes(soFar);
 +}
 +int NFAOrNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
 +{
 +  int ci = one->match(str, matcher, curInd);
 +
 +  if (ci != -1) ci = next->match(str, matcher, ci);
 +  if (ci != -1) return ci;
 +  if (ci == -1) ci = two->match(str, matcher, curInd);
 +  if (ci != -1) ci = next->match(str, matcher, ci);
 +  return ci;
 +}
 +
 +// NFAQuoteNode
 +
 +NFAQuoteNode::NFAQuoteNode(const bkstring & quoted) : qStr(quoted) { }
 +int NFAQuoteNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
 +{
 +  if (curInd + qStr.size() > str.size())       return -1;
 +  if (str.substr(curInd, qStr.size()) != qStr) return -1;
 +  return next->match(str, matcher, curInd + qStr.size());
 +}
 +
 +// NFACIQuoteNode
 +
 +NFACIQuoteNode::NFACIQuoteNode(const bkstring & quoted) : qStr(quoted) { }
 +int NFACIQuoteNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
 +{
 +  if (curInd + qStr.size() > str.size()) return -1;
 +  if (str_icmp(str.substr(curInd, qStr.size()).c_str(),  qStr.c_str())) return -1;
 +  return next->match(str, matcher, qStr.size());
 +}
 +
 +// NFALookAheadNode
 +
 +NFALookAheadNode::NFALookAheadNode(NFANode * internal, const bool positive) : NFANode(), pos(positive), inner(internal) { }
 +void NFALookAheadNode::findAllNodes(std::map<NFANode*, bool> & soFar)
 +{
 +  if (inner) inner->findAllNodes(soFar);
 +  NFANode::findAllNodes(soFar);
 +}
 +int NFALookAheadNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
 +{
 +  return ((inner->match(str, matcher, curInd) == -1) ^ pos) ? next->match(str, matcher, curInd) : -1;
 +}
 +
 +// NFALookBehindNode
 +
 +NFALookBehindNode::NFALookBehindNode(const bkstring & str, const bool positive) : pos(positive), mStr(str) { }
 +int NFALookBehindNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
 +{
 +  if (pos)
 +  {
 +    if (curInd < (int)mStr.size()) return -1;
 +    if (str.substr(curInd - mStr.size(), mStr.size()) == mStr) return next->match(str, matcher, curInd);
 +  }
 +  else
 +  {
 +    if (curInd < (int)mStr.size()) return next->match(str, matcher, curInd);
 +    if (str.substr(curInd - mStr.size(), mStr.size()) == mStr) return -1;
 +     return next->match(str, matcher, curInd);
 +  }
 +  return -1;
 +}
 +
 +// NFAStartOfLineNode
 +
 +NFAStartOfLineNode::NFAStartOfLineNode() { }
 +int NFAStartOfLineNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
 +{
 +  if (curInd == 0 || str[curInd - 1] == '\n' || str[curInd - 1] == '\r')
 +  {
 +    return next->match(str, matcher, curInd);
 +  }
 +  return -1;
 +}
 +
 +// NFAEndOfLineNode
 +
 +NFAEndOfLineNode::NFAEndOfLineNode() { }
 +int NFAEndOfLineNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
 +{
 +  if (curInd >= (int)str.size() || str[curInd] == '\n' || str[curInd] == '\r')
 +  {
 +    return next->match(str, matcher, curInd);
 +  }
 +  return -1;
 +}
 +
 +// NFAReferenceNode
 +
 +NFAReferenceNode::NFAReferenceNode(const int groupIndex) : gi(groupIndex) { }
 +int NFAReferenceNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
 +{
 +  int len = matcher->ends[gi] - matcher->starts[gi];
 +  int ni = -1;
 +  if      (gi < 1 || matcher->ends[gi] < matcher->starts[gi] || len == 0)   ni = curInd;
 +  else if (curInd + len > (int)str.size())                                  return -1;
 +  else if (str.substr(curInd, len) != str.substr(matcher->starts[gi], len)) return -1;
 +  else                                                                      ni = curInd + len;
 +
 +  return next->match(str, matcher, ni);
 +}
 +
 +// NFAStartOfInputNode
 +
 +NFAStartOfInputNode::NFAStartOfInputNode() { }
 +int NFAStartOfInputNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
 +{
 +  if (curInd == 0) return next->match(str, matcher, curInd);
 +  return -1;
 +}
 +
 +// NFAEndOfInputNode
 +
 +NFAEndOfInputNode::NFAEndOfInputNode(const bool lookForTerm) : term(lookForTerm) { }
 +int NFAEndOfInputNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
 +{
 +  int len = (int)str.size();
 +  if      (curInd == len) return next->match(str, matcher, curInd);
 +  else if (term)
 +  {
 +    if      (curInd == len - 1 && (str[curInd] == '\r' || str[curInd] == '\n'))
 +    {
 +      return next->match(str, matcher, curInd);
 +    }
 +    else if (curInd == len - 2 && str.substr(curInd, 2) == "\r\n")
 +    {
 +      return next->match(str, matcher, curInd);
 +    }
 +  }
 +  return -1;
 +}
 +
 +// NFAWordBoundaryNode
 +
 +NFAWordBoundaryNode::NFAWordBoundaryNode(const bool positive) : pos(positive) { }
 +int NFAWordBoundaryNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
 +{
 +  int len = (int)str.size();
 +
 +  char c1 = (curInd - 1 < len && curInd > 0) ? str[curInd - 1] : '\n';
 +  char c2 = (curInd     < len)               ? str[curInd    ] : '\n';
 +
 +  if      (curInd == len) return next->match(str, matcher, curInd);
 +  bool ok = is_alpha(c1) != is_alpha(c2);
 +  if (ok && pos) return next->match(str, matcher, curInd);
 +  return -1;
 +}
 +
 +// NFAEndOfMatchNode
 +
 +NFAEndOfMatchNode::NFAEndOfMatchNode() { }
 +int NFAEndOfMatchNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
 +{
 +  if (curInd == matcher->lm) return next->match(str, matcher, curInd);
 +  return -1;
 +}
 +
 +// NFAGroupHeadNode
 +
 +NFAGroupHeadNode::NFAGroupHeadNode(const int groupIndex) : gi(groupIndex) { }
 +int NFAGroupHeadNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
 +{
 +  int ret, o = matcher->starts[gi];
 +
 +  matcher->starts[gi] = curInd;
 +  ret = next->match(str, matcher, curInd);
 +  if (ret < 0) matcher->starts[gi] = o;
 +
 +  return ret;
 +}
 +
 +// NFAGroupTailNode
 +
 +NFAGroupTailNode::NFAGroupTailNode(const int groupIndex) : gi(groupIndex) { }
 +int NFAGroupTailNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
 +{
 +  int ret, o = matcher->ends[gi];
 +
 +  matcher->ends[gi] = curInd;
 +  ret = next->match(str, matcher, curInd);
 +  if (ret < 0) matcher->ends[gi] = o;
 +
 +  return ret;
 +}
 +
 +// NFAGroupLoopPrologueNode
 +
 +NFAGroupLoopPrologueNode::NFAGroupLoopPrologueNode(const int groupIndex) : gi(groupIndex) { }
 +int NFAGroupLoopPrologueNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
 +{
 +  int ret, o1 = matcher->groups[gi], o2 = matcher->groupPos[gi], o3 = matcher->groupIndeces[gi];
 +
 +  matcher->groups[gi] = 0;
 +  matcher->groupPos[gi] = 0;
 +  matcher->groupIndeces[gi] = -1;
 +  ret = next->match(str, matcher, curInd);
 +  if (ret < 0)
 +  {
 +    matcher->groups[gi] = o1;
 +    matcher->groupPos[gi] = o2;
 +    matcher->groupIndeces[gi] = o3;
 +  }
 +
 +  return ret;
 +}
 +
 +// NFAGroupLoopNode
 +
 +NFAGroupLoopNode::NFAGroupLoopNode(NFANode * internal, const int minMatch, const int maxMatch,
 +                                   const int groupIndex, const int matchType)
 +{
 +  inner = internal;
 +  min = minMatch;
 +  max = maxMatch;
 +  gi = groupIndex;
 +  type = matchType;
 +}
 +void NFAGroupLoopNode::findAllNodes(std::map<NFANode*, bool> & soFar)
 +{
 +  if (inner) inner->findAllNodes(soFar);
 +  NFANode::findAllNodes(soFar);
 +}
 +int NFAGroupLoopNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
 +{
 +  bool b = (curInd > matcher->groupIndeces[gi]);
 +
 +  if (b && matcher->groups[gi] < min)
 +  {
 +    ++matcher->groups[gi];
 +    int o = matcher->groupIndeces[gi];
 +    matcher->groupIndeces[gi] = curInd;
 +    int ret = inner->match(str, matcher, curInd);
 +    if (ret < 0)
 +    {
 +      matcher->groupIndeces[gi] = o;
 +      --matcher->groups[gi];
 +    }
 +    return ret;
 +  }
 +  else if (!b || matcher->groups[gi] >= max)
 +  {
 +    return next->match(str, matcher, curInd);
 +  }
 +  else
 +  {
 +    switch (type)
 +    {
 +    case 0: return matchGreedy(str, matcher, curInd);
 +    case 1: return matchLazy(str, matcher, curInd);
 +    case 2: return matchPossessive(str, matcher, curInd);
 +    }
 +  }
 +  return -1;
 +}
 +int NFAGroupLoopNode::matchGreedy(const bkstring & str, Matcher * matcher, const int curInd) const
 +{
 +  int o = matcher->groupIndeces[gi];            // save our info for backtracking
 +  matcher->groupIndeces[gi] = curInd;           // move along
 +  ++matcher->groups[gi];
 +  int ret = inner->match(str, matcher, curInd); // match internally
 +  if (ret < 0)
 +  {                                             // if we failed, then restore info and match next
 +    --matcher->groups[gi];
 +    matcher->groupIndeces[gi] = o;
 +    ret = next->match(str, matcher, curInd);
 +  }
 +  return ret;
 +}
 +int NFAGroupLoopNode::matchLazy(const bkstring & str, Matcher * matcher, const int curInd) const
 +{
 +  int ret = next->match(str, matcher, curInd);  // be lazy, just go on
 +  if (ret < 0)
 +  {
 +    int o = matcher->groupIndeces[gi];          // save info for backtracking
 +    matcher->groupIndeces[gi] = curInd;         // advance our position
 +    ++matcher->groups[gi];
 +    ret = inner->match(str, matcher, curInd);   // match our internal stuff
 +    if (ret < 0)                                // if we failed, then restore the info
 +    {
 +      --matcher->groups[gi];
 +      matcher->groupIndeces[gi] = o;
 +    }
 +  }
 +  return ret;
 +}
 +int NFAGroupLoopNode::matchPossessive(const bkstring & str, Matcher * matcher, const int curInd) const
 +{
 +  int o = matcher->groupIndeces[gi];            // save info for backtracking
 +  matcher->groupPos[gi] = matcher->groups[gi];  // set a flag stating we have matcher at least this much
 +  matcher->groupIndeces[gi] = curInd;           // move along
 +  ++matcher->groups[gi];
 +  int ret = inner->match(str, matcher, curInd); // try and match again
 +  if (ret < 0)
 +  {                                             // if we fail, back off, but to an extent
 +    --matcher->groups[gi];
 +    matcher->groupIndeces[gi] = o;
 +    if (matcher->groups[gi] == matcher->groupPos[gi]) ret = next->match(str, matcher, curInd);
 +  }
 +  return ret;
 +}
 +
 +#ifdef _WIN32
 +  #pragma warning(pop)
 +#endif
 diff --git a/plugins/SmileyAdd/src/regexp/Pattern.h b/plugins/SmileyAdd/src/regexp/Pattern.h new file mode 100644 index 0000000000..bb16ad90fa --- /dev/null +++ b/plugins/SmileyAdd/src/regexp/Pattern.h @@ -0,0 +1,1663 @@ +#ifndef __PATTERN_H__
 +#define __PATTERN_H__
 +
 +#ifdef _WIN32
 +  #pragma warning(disable:4786)
 +#endif
 +
 +#include <vector>
 +#include <map>
 +
 +#include "bkstring.h"
 +
 +class Matcher;
 +class NFANode;
 +class NFAQuantifierNode;
 +
 +/**
 +  This pattern class is very similar in functionality to Java's
 +  java.util.regex.Pattern class. The pattern class represents an immutable
 +  regular expression object. Instead of having a single object contain both the
 +  regular expression object and the matching object, instead the two objects are
 +  split apart. The {@link Matcher Matcher} class represents the maching
 +  object.
 +
 +  The Pattern class works primarily off of "compiled" patterns. A typical
 +  instantiation of a regular expression looks like:
 +
 +  <pre>
 +  Pattern * p = Pattern::compile("a*b");
 +  Matcher * m = p->createMatcher("aaaaaab");
 +  if (m->matches()) ...
 +  </pre>
 +
 +  However, if you do not need to use a pattern more than once, it is often times
 +  okay to use the Pattern's static methods insteads. An example looks like this:
 +
 +  <pre>
 +  if (Pattern::matches("a*b", "aaaab")) { ... }
 +  </pre>
 +
 +  This class does not currently support unicode. The unicode update for this
 +  class is coming soon.
 +
 +  This class is partially immutable. It is completely safe to call createMatcher
 +  concurrently in different threads, but the other functions (e.g. split) should
 +  not be called concurrently on the same <code>Pattern</code>.
 +
 +  <table border="0" cellpadding="1" cellspacing="0">
 +    <tr align="left" bgcolor="#CCCCFF">
 +      <td>
 +        <b>Construct</b>
 +      </td>
 +      <td>
 +        <b>Matches</b>
 +      </th>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +       
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +        <b>Characters</b>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x</i></code>
 +      </td>
 +      <td>
 +        The character <code><i>x</i></code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\\</code>
 +      </td>
 +      <td>
 +        The character <code>\</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\0<i>nn</i></code>
 +      </td>
 +      <td>
 +        The character with octal ASCII value <code><i>nn</i></code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\0<i>nnn</i></code>
 +      </td>
 +      <td>
 +        The character with octal ASCII value <code><i>nnn</i></code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\x<i>hh</i></code>
 +      </td>
 +      <td>
 +        The character with hexadecimal ASCII value <code><i>hh</i></code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\t</code>
 +      </td>
 +      <td>
 +        A tab character
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\r</code>
 +      </td>
 +      <td>
 +        A carriage return character
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\n</code>
 +      </td>
 +      <td>
 +        A new-line character
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +         
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <b>Character Classes</b>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>[abc]</code>
 +      </td>
 +      <td>
 +        Either <code>a</code>, <code>b</code>, or <code>c</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>[^abc]</code>
 +      </td>
 +      <td>
 +        Any character but <code>a</code>, <code>b</code>, or <code>c</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>[a-zA-Z]</code>
 +      </td>
 +      <td>
 +        Any character ranging from <code>a</code> thru <code>z</code>, or
 +        <code>A</code> thru <code>Z</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>[^a-zA-Z]</code>
 +      </td>
 +      <td>
 +        Any character except those ranging from <code>a</code> thru
 +        <code>z</code>, or <code>A</code> thru <code>Z</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>[a\-z]</code>
 +      </td>
 +      <td>
 +        Either <code>a</code>, <code>-</code>, or <code>z</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>[a-z[A-Z]]</code>
 +      </td>
 +      <td>
 +        Same as <code>[a-zA-Z]</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>[a-z&&[g-i]]</code>
 +      </td>
 +      <td>
 +        Any character in the intersection of <code>a-z</code> and
 +        <code>g-i</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>[a-z&&[^g-i]]</code>
 +      </td>
 +      <td>
 +        Any character in <code>a-z</code> and not in <code>g-i</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +         
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +        <b>Prefefined character classes</b>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><b>.</b></code>
 +      </td>
 +      <td>
 +        Any character. Multiline matching must be compiled into the pattern for
 +        <code><b>.</b></code> to match a <code>\r</code> or a <code>\n</code>.
 +        Even if multiline matching is enabled, <code><b>.</b></code> will not
 +        match a <code>\r\n</code>, only a <code>\r</code> or a <code>\n</code>.
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\d</code>
 +      </td>
 +      <td>
 +        <code>[0-9]</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\D</code>
 +      </td>
 +      <td>
 +        <code>[^\d]</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\s</code>
 +      </td>
 +      <td>
 +        <code>[ \t\r\n\x0B]</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\S</code>
 +      </td>
 +      <td>
 +        <code>[^\s]</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\w</code>
 +      </td>
 +      <td>
 +        <code>[a-zA-Z0-9_]</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\W</code>
 +      </td>
 +      <td>
 +        <code>[^\w]</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +         
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +        <b>POSIX character classes
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\p{Lower}</code>
 +      </td>
 +      <td>
 +        <code>[a-z]</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\p{Upper}</code>
 +      </td>
 +      <td>
 +        <code>[A-Z]</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\p{ASCII}</code>
 +      </td>
 +      <td>
 +        <code>[\x00-\x7F]</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\p{Alpha}</code>
 +      </td>
 +      <td>
 +        <code>[a-zA-Z]</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\p{Digit}</code>
 +      </td>
 +      <td>
 +        <code>[0-9]</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\p{Alnum}</code>
 +      </td>
 +      <td>
 +        <code>[\w&&[^_]]</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\p{Punct}</code>
 +      </td>
 +      <td>
 +        <code>[!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~]</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\p{XDigit}</code>
 +      </td>
 +      <td>
 +        <code>[a-fA-F0-9]</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +         
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +        <b>Boundary Matches</b>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>^</code>
 +      </td>
 +      <td>
 +        The beginning of a line. Also matches the beginning of input.
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>$</code>
 +      </td>
 +      <td>
 +        The end of a line. Also matches the end of input.
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\b</code>
 +      </td>
 +      <td>
 +        A word boundary
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\B</code>
 +      </td>
 +      <td>
 +        A non word boundary
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\A</code>
 +      </td>
 +      <td>
 +        The beginning of input
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\G</code>
 +      </td>
 +      <td>
 +        The end of the previous match. Ensures that a "next" match will only
 +        happen if it begins with the character immediately following the end of
 +        the "current" match.
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\Z</code>
 +      </td>
 +      <td>
 +        The end of input. Will also match if there is a single trailing
 +        <code>\r\n</code>, a single trailing <code>\r</code>, or a single
 +        trailing <code>\n</code>.
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\z</code>
 +      </td>
 +      <td>
 +        The end of input
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +         
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +        <b>Greedy Quantifiers</b>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x?</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, either zero times or one time
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x*</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, zero or more times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x+</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, one or more times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x{n}</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, exactly n times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x{n,}</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, at least <code><i>n</i></code> times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x{,m}</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, at most <code><i>m</i></code> times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x{n,m}</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, at least <code><i>n</i></code> times and at most
 +        <code><i>m</i></code> times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +         
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +        <b>Possessive Quantifiers</b>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x?+</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, either zero times or one time
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x*+</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, zero or more times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x++</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, one or more times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x{n}+</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, exactly n times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x{n,}+</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, at least <code><i>n</i></code> times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x{,m}+</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, at most <code><i>m</i></code> times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x{n,m}+</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, at least <code><i>n</i></code> times and at most
 +        <code><i>m</i></code> times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +         
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +        <b>Reluctant Quantifiers</b>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x??</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, either zero times or one time
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x*?</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, zero or more times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x+?</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, one or more times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x{n}?</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, exactly n times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x{n,}?</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, at least <code><i>n</i></code> times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x{,m}?</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, at most <code><i>m</i></code> times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x{n,m}?</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, at least <code><i>n</i></code> times and at most
 +        <code><i>m</i></code> times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +         
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +        <b>Operators</b>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>xy</i></code>
 +      </td>
 +      <td>
 +        <code><i>x</i></code> then <code><i>y</i></code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x</i></code>|<code><i>y</i></code>
 +      </td>
 +      <td>
 +        <code><i>x</i></code> or <code><i>y</i></code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>(<i>x</i>)</code>
 +      </td>
 +      <td>
 +        <code><i>x</i></code> as a capturing group
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +         
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +        <b>Quoting</b>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\Q</code>
 +      </td>
 +      <td>
 +        Nothing, but treat every character (including \s) literally until a
 +        matching <code>\E</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\E</code>
 +      </td>
 +      <td>
 +        Nothing, but ends its matching <code>\Q</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +         
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +        <b>Special Constructs</b>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>(?:<i>x</i>)</code>
 +      </td>
 +      <td>
 +        <code><i>x</i></code>, but not as a capturing group
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>(?=<i>x</i>)</code>
 +      </td>
 +      <td>
 +        <code><i>x</i></code>, via positive lookahead. This means that the
 +        expression will match only if it is trailed by <code><i>x</i></code>.
 +        It will not "eat" any of the characters matched by
 +        <code><i>x</i></code>.
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>(?!<i>x</i>)</code>
 +      </td>
 +      <td>
 +        <code><i>x</i></code>, via negative lookahead. This means that the
 +        expression will match only if it is not trailed by
 +        <code><i>x</i></code>. It will not "eat" any of the characters
 +        matched by <code><i>x</i></code>.
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>(?<=<i>x</i>)</code>
 +      </td>
 +      <td>
 +        <code><i>x</i></code>, via positive lookbehind. <code><i>x</i></code>
 +        cannot contain any quantifiers.
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>(?<!<i>x</i>)</code>
 +      </td>
 +      <td>
 +        <code><i>x</i></code>, via negative lookbehind. <code><i>x</i></code>
 +        cannot contain any quantifiers.
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>(?><i>x</i>)</code>
 +      </td>
 +      <td>
 +        <code><i>x</i>{1}+</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +         
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +        <b>Registered Expression Matching</b>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>{<i>x</i>}</code>
 +      </td>
 +      <td>
 +        The registered pattern <code><i>x</i></code>
 +      </td>
 +    </tr>
 +  </table>
 +
 +  <hr>
 +
 +   <i>Begin Text Extracted And Modified From java.util.regex.Pattern documentation</i>
 +
 +   <h4> Backslashes, escapes, and quoting </h4>
 +
 +   <p> The backslash character (<tt>'\'</tt>) serves to introduce escaped
 +   constructs, as defined in the table above, as well as to quote characters
 +   that otherwise would be interpreted as unescaped constructs.  Thus the
 +   expression <tt>\\</tt> matches a single backslash and <tt>\{</tt> matches a
 +   left brace.
 +
 +   <p> It is an error to use a backslash prior to any alphabetic character that
 +   does not denote an escaped construct; these are reserved for future
 +   extensions to the regular-expression language.  A backslash may be used
 +   prior to a non-alphabetic character regardless of whether that character is
 +   part of an unescaped construct.
 +
 +   <p>It is necessary to double backslashes in string literals that represent
 +   regular expressions to protect them from interpretation by a compiler.  The
 +   string literal <tt>"\b"</tt>, for example, matches a single backspace
 +   character when interpreted as a regular expression, while
 +   <tt>"\\b"</tt> matches a word boundary.  The string litera
 +    <tt>"\(hello\)"</tt> is illegal and leads to a compile-time error;
 +    in order to match the string <tt>(hello)</tt> the string literal
 +    <tt>"\\(hello\\)"</tt> must be used.
 +
 +   <h4> Character Classes </h4>
 +
 +      <p> Character classes may appear within other character classes, and
 +      may be composed by the union operator (implicit) and the intersection
 +      operator (<tt>&&</tt>).
 +      The union operator denotes a class that contains every character that is
 +      in at least one of its operand classes.  The intersection operator
 +      denotes a class that contains every character that is in both of its
 +      operand classes.
 +
 +      <p> The precedence of character-class operators is as follows, from
 +      highest to lowest:
 +
 +      <blockquote><table border="0" cellpadding="1" cellspacing="0"
 +                   summary="Precedence of character class operators.">
 +
 +        <tr><th>1    </th>
 +            <td>Literal escape    </td>
 +            <td><tt>\x</tt></td></tr>
 +        <tr><th>2    </th>
 +            <td>Range</td>
 +            <td><tt>a-z</tt></td></tr>
 +        <tr><th>3    </th>
 +            <td>Grouping</td>
 +            <td><tt>[...]</tt></td></tr>
 +        <tr><th>4    </th>
 +            <td>Intersection</td>
 +            <td><tt>[a-z&&[aeiou]]</tt></td></tr>
 +        <tr><th>5    </th>
 +            <td>Union</td>
 +            <td><tt>[a-e][i-u]<tt></td></tr>
 +      </table></blockquote>
 +
 +      <p> Note that a different set of metacharacters are in effect inside
 +      a character class than outside a character class. For instance, the
 +      regular expression <tt>.</tt> loses its special meaning inside a
 +      character class, while the expression <tt>-</tt> becomes a range
 +      forming metacharacter.
 +
 +   <a name="lt">
 +
 +   <a name="cg">
 +   <h4> Groups and capturing </h4>
 +
 +   <p> Capturing groups are numbered by counting their opening parentheses from
 +   left to right.  In the expression <tt>((A)(B(C)))</tt>, for example, there
 +   are four such groups: </p>
 +
 +   <blockquote><table cellpadding=1 cellspacing=0 summary="Capturing group numberings">
 +
 +   <tr><th>1    </th>
 +       <td><tt>((A)(B(C)))</tt></td></tr>
 +   <tr><th>2    </th>
 +       <td><tt>(A)</tt></td></tr>
 +   <tr><th>3    </th>
 +       <td><tt>(B(C))</tt></td></tr>
 +
 +   <tr><th>4    </th>
 +       <td><tt>(C)</tt></td></tr>
 +   </table></blockquote>
 +
 +   <p> Group zero always stands for the entire expression.
 +
 +   <p> Capturing groups are so named because, during a match, each subsequence
 +   of the input sequence that matches such a group is saved.  The captured
 +   subsequence may be used later in the expression, via a back reference, and
 +   may also be retrieved from the matcher once the match operation is complete.
 +
 +   <p> The captured input associated with a group is always the subsequence
 +   that the group most recently matched.  If a group is evaluated a second time
 +   because of quantification then its previously-captured value, if any, will
 +   be retained if the second evaluation fails.  Matching the string
 +   <tt>"aba"</tt> against the expression <tt>(a(b)?)+</tt>, for example, leaves
 +   group two set to <tt>"b"</tt>.  All captured input is discarded at the
 +   beginning of each match.
 +
 +   <p> Groups beginning with <tt>(?</tt> are pure, <i>non-capturing</i> groups
 +   that do not capture text and do not count towards the group total.
 +
 +
 +   <h4> Unicode support </h4>
 +
 +   <p> Coming Soon.
 +
 +   <h4> Comparison to Perl 5 </h4>
 +
 +   <p>The <code>Pattern</code> engine performs traditional NFA-based matching
 +   with ordered alternation as occurs in Perl 5.
 +
 +   <p> Perl constructs not supported by this class: </p>
 +
 +   <ul>
 +
 +      <li><p> The conditional constructs <tt>(?{</tt><i>X</i><tt>})</tt> and
 +      <tt>(?(</tt><i>condition</i><tt>)</tt><i>X</i><tt>|</tt><i>Y</i><tt>)</tt>,
 +      </p></li>
 +
 +      <li><p> The embedded code constructs <tt>(?{</tt><i>code</i><tt>})</tt>
 +      and <tt>(??{</tt><i>code</i><tt>})</tt>,</p></li>
 +
 +      <li><p> The embedded comment syntax <tt>(?#comment)</tt>, and </p></li>
 +
 +      <li><p> The preprocessing operations <tt>\l</tt> <tt>\u</tt>,
 +      <tt>\L</tt>, and <tt>\U</tt>.  </p></li>
 +
 +      <li><p> Embedded flags</p></li>
 +
 +   </ul>
 +
 +   <p> Constructs supported by this class but not by Perl: </p>
 +
 +   <ul>
 +
 +      <li><p> Possessive quantifiers, which greedily match as much as they can
 +      and do not back off, even when doing so would allow the overall match to
 +      succeed.  </p></li>
 +
 +      <li><p> Character-class union and intersection as described
 +      above.</p></li>
 +
 +   </ul>
 +
 +   <p> Notable differences from Perl: </p>
 +
 +   <ul>
 +
 +      <li><p> In Perl, <tt>\1</tt> through <tt>\9</tt> are always interpreted
 +      as back references; a backslash-escaped number greater than <tt>9</tt> is
 +      treated as a back reference if at least that many subexpressions exist,
 +      otherwise it is interpreted, if possible, as an octal escape.  In this
 +      class octal escapes must always begin with a zero. In this class,
 +      <tt>\1</tt> through <tt>\9</tt> are always interpreted as back
 +      references, and a larger number is accepted as a back reference if at
 +      least that many subexpressions exist at that point in the regular
 +      expression, otherwise the parser will drop digits until the number is
 +      smaller or equal to the existing number of groups or it is one digit.
 +      </p></li>
 +
 +      <li><p> Perl uses the <tt>g</tt> flag to request a match that resumes
 +      where the last match left off.  This functionality is provided implicitly
 +      by the <CODE>Matcher</CODE> class: Repeated invocations of the
 +      <code>find</code> method will resume where the last match left off,
 +      unless the matcher is reset.  </p></li>
 +
 +      <li><p> Perl is forgiving about malformed matching constructs, as in the
 +      expression <tt>*a</tt>, as well as dangling brackets, as in the
 +      expression <tt>abc]</tt>, and treats them as literals.  This
 +      class also strict and will not compile a pattern when dangling characters
 +      are encountered.</p></li>
 +
 +   </ul>
 +
 +
 +   <p> For a more precise description of the behavior of regular expression
 +   constructs, please see <a href="http://www.oreilly.com/catalog/regex2/">
 +   <i>Mastering Regular Expressions, 2nd Edition</i>, Jeffrey E. F. Friedl,
 +   O'Reilly and Associates, 2002.</a>
 +   </p>
 +  <P>
 +
 +  <i>End Text Extracted And Modified From java.util.regex.Pattern documentation</i>
 +
 +  <hr>
 +
 +  @author    Jeffery Stuart
 +  @since     March 2003, Stable Since November 2004
 +  @version   1.07.00
 +  @memo      A class used to represent "PERL 5"-ish regular expressions
 + */
 +class Pattern
 +{
 +  friend class Matcher;
 +  friend class NFANode;
 +  friend class NFAQuantifierNode;
 +  private:
 +    /**
 +      This constructor should not be called directly. Those wishing to use the
 +      Pattern class should instead use the {@link compile compile} method.
 +
 +      @param rhs The pattern to compile
 +      @memo Creates a new pattern from the regular expression in <code>rhs</code>.
 +     */
 +    Pattern(const bkstring & rhs);
 +  protected:
 +    /**
 +      This currently is not used, so don't try to do anything with it.
 +      @memo Holds all the compiled patterns for quick access.
 +     */
 +    static std::map<bkstring, Pattern *> compiledPatterns;
 +    /**
 +      Holds all of the registered patterns as strings. Due to certain problems
 +      with compilation of patterns, especially with capturing groups, this seemed
 +      to be the best way to do it.
 +     */
 +    static std::map<bkstring, std::pair<bkstring, unsigned long> > registeredPatterns;
 +  protected:
 +    /**
 +      Holds all the NFA nodes used. This makes deletion of a pattern, as well as
 +      clean-up from an unsuccessful compile much easier and faster.
 +     */
 +    std::map<NFANode*, bool> nodes;
 +    /**
 +      Used when methods like split are called. The matcher class uses a lot of
 +      dynamic memeory, so having an instance increases speedup of certain
 +      operations.
 +     */
 +    Matcher * matcher;
 +    /**
 +      The front node of the NFA.
 +     */
 +    NFANode * head;
 +    /**
 +      The actual regular expression we rerpesent
 +      */
 +    bkstring pattern;
 +    /**
 +      Flag used during compilation. Once the pattern is successfully compiled,
 +      <code>error</code> is no longer used.
 +     */
 +    bool error;
 +    /**
 +      Used during compilation to keep track of the current index into
 +      <code>{@link pattern pattern}<code>.  Once the pattern is successfully
 +      compiled, <code>error</code> is no longer used.
 +     */
 +    int curInd;
 +    /**
 +      The number of capture groups this contains.
 +     */
 +    int groupCount;
 +    /**
 +      The number of non-capture groups this contains.
 +     */
 +    int nonCapGroupCount;
 +    /**
 +      The flags specified when this was compiled.
 +     */
 +    unsigned long flags;
 +  protected:
 +    /**
 +      Raises an error during compilation. Compilation will cease at that point
 +      and compile will return <code>NULL</code>.
 +     */
 +    void raiseError();
 +    /**
 +      Convenience function for registering a node in <code>nodes</code>.
 +      @param node The node to register
 +      @return The registered node
 +     */
 +    NFANode * registerNode(NFANode * node);
 +
 +    /**
 +      Calculates the union of two strings. This function will first sort the
 +      strings and then use a simple selection algorithm to find the union.
 +      @param s1 The first "class" to union
 +      @param s2 The second "class" to union
 +      @return A new string containing all unique characters. Each character
 +              must have appeared in one or both of <code>s1</code> and
 +              <code>s2</code>.
 +     */
 +    bkstring classUnion      (bkstring s1, bkstring s2)  const;
 +    /**
 +      Calculates the intersection of two strings. This function will first sort
 +      the strings and then use a simple selection algorithm to find the
 +      intersection.
 +      @param s1 The first "class" to intersect
 +      @param s2 The second "class" to intersect
 +      @return A new string containing all unique characters. Each character
 +              must have appeared both <code>s1</code> and <code>s2</code>.
 +     */
 +    bkstring classIntersect  (bkstring s1, bkstring s2)  const;
 +    /**
 +      Calculates the negation of a string. The negation is the set of all
 +      characters between <code>\x00</code> and <code>\xFF</code> not
 +      contained in <code>s1</code>.
 +      @param s1 The "class" to be negated.
 +      @param s2 The second "class" to intersect
 +      @return A new string containing all unique characters. Each character
 +              must have appeared both <code>s1</code> and <code>s2</code>.
 +     */
 +    bkstring classNegate     (bkstring s1)                  const;
 +    /**
 +      Creates a new "class" representing the range from <code>low</code> thru
 +      <code>hi</code>. This function will wrap if <code>low</code> >
 +      <code>hi</code>. This is a feature, not a buf. Sometimes it is useful
 +      to be able to say [\x70-\x10] instead of [\x70-\x7F\x00-\x10].
 +      @param low The beginning character
 +      @param hi  The ending character
 +      @return A new string containing all the characters from low thru hi.
 +     */
 +    bkstring classCreateRange(char low,       char hi)         const;
 +
 +    /**
 +      Extracts a decimal number from the substring of member-variable
 +      <code>{@link pattern pattern}<code> starting at <code>start</code> and
 +      ending at <code>end</code>.
 +      @param start The starting index in <code>{@link pattern pattern}<code>
 +      @param end The last index in <code>{@link pattern pattern}<code>
 +      @return The decimal number in <code>{@link pattern pattern}<code>
 +     */
 +    int getInt(int start, int end);
 +    /**
 +      Parses a <code>{n,m}</code> string out of the member-variable
 +      <code>{@link pattern pattern}<code> stores the result in <code>sNum</code>
 +      and <code>eNum</code>.
 +      @param sNum Output parameter. The minimum number of matches required
 +                  by the curly quantifier are stored here.
 +      @param eNum Output parameter. The maximum number of matches allowed
 +                  by the curly quantifier are stored here.
 +      @return Success/Failure. Fails when the curly does not have the proper
 +              syntax
 +     */
 +    bool quantifyCurly(int & sNum, int & eNum);
 +    /**
 +      Tries to quantify the currently parsed group. If the group being parsed
 +      is indeed quantified in the member-variable
 +      <code>{@link pattern pattern}<code>, then the NFA is modified accordingly.
 +      @param start  The starting node of the current group being parsed
 +      @param stop   The ending node of the current group being parsed
 +      @param gn     The group number of the current group being parsed
 +      @return       The node representing the starting node of the group. If the
 +                    group becomes quantified, then this node is not necessarily
 +                    a GroupHead node.
 +     */
 +    NFANode * quantifyGroup(NFANode * start, NFANode * stop, const int gn);
 +
 +    /**
 +      Tries to quantify the last parsed expression. If the character was indeed
 +      quantified, then the NFA is modified accordingly.
 +      @param newNode The recently created expression node
 +      @return The node representing the last parsed expression. If the
 +              expression was quantified, <code>return value != newNode</code>
 +     */
 +    NFANode * quantify(NFANode * newNode);
 +    /**
 +      Parses the current class being examined in
 +      <code>{@link pattern pattern}</code>.
 +      @return A string of unique characters contained in the current class being
 +              parsed
 +     */
 +    bkstring parseClass();
 +    /**
 +      Parses the current POSIX class being examined in
 +      <code>{@link pattern pattern}</code>.
 +      @return A string of unique characters representing the POSIX class being
 +              parsed
 +     */
 +    bkstring parsePosix();
 +    /**
 +      Returns a string containing the octal character being parsed
 +      @return The string contained the octal value being parsed
 +     */
 +    bkstring parseOctal();
 +    /**
 +      Returns a string containing the hex character being parsed
 +      @return The string contained the hex value being parsed
 +     */
 +    bkstring parseHex();
 +    /**
 +      Returns a new node representing the back reference being parsed
 +      @return The new node representing the back reference being parsed
 +     */
 +    NFANode *   parseBackref();
 +    /**
 +      Parses the escape sequence currently being examined. Determines if the
 +      escape sequence is a class, a single character, or the beginning of a
 +      quotation sequence.
 +      @param inv Output parameter. Whether or not to invert the returned class
 +      @param quo Output parameter. Whether or not this sequence starts a
 +                 quotation.
 +      @return The characters represented by the class
 +     */
 +    bkstring parseEscape(bool & inv, bool & quo);
 +    /**
 +      Parses a supposed registered pattern currently under compilation. If the
 +      sequence of characters does point to a registered pattern, then the
 +      registered pattern is appended to <code>*end<code>. The registered pattern
 +      is parsed with the current compilation flags.
 +      @param end The ending node of the thus-far compiled pattern
 +      @return The new end node of the current pattern
 +     */
 +    NFANode * parseRegisteredPattern(NFANode ** end);
 +    /**
 +      Parses a lookbehind expression. Appends the necessary nodes
 +      <code>*end</code>.
 +      @param pos Positive or negative look behind
 +      @param end The ending node of the current pattern
 +      @return The new end node of the current pattern
 +     */
 +    NFANode * parseBehind(const bool pos, NFANode ** end);
 +    /**
 +      Parses the current expression and tacks on nodes until a \E is found.
 +      @return The end of the current pattern
 +     */
 +    NFANode * parseQuote();
 +    /**
 +      Parses <code>{@link pattern pattern}</code>. This function is called
 +      recursively when an or (<code>|</code>) or a group is encountered.
 +      @param inParen Are we currently parsing inside a group
 +      @param inOr Are we currently parsing one side of an or (<code>|</code>)
 +      @param end The end of the current expression
 +      @return The starting node of the NFA constructed from this parse
 +     */
 +    NFANode * parse(const bool inParen = 0, const bool inOr = 0, NFANode ** end = NULL);
 +  public:
 +    /// We should match regardless of case
 +    const static unsigned long CASE_INSENSITIVE;
 +    /// We are implicitly quoted
 +    const static unsigned long LITERAL;
 +    /// @memo We should treat a <code><b>.</b></code> as [\x00-\x7F]
 +    const static unsigned long DOT_MATCHES_ALL;
 +    /** <code>^</code> and <code>$</code> should anchor to the beginning and
 +        ending of lines, not all input
 +     */
 +    const static unsigned long MULTILINE_MATCHING;
 +    /** When enabled, only instances of <code>\n</codes> are recognized as
 +        line terminators
 +     */
 +    const static unsigned long UNIX_LINE_MODE;
 +    /// The absolute minimum number of matches a quantifier can match (0)
 +    const static int MIN_QMATCH;
 +    /// The absolute maximum number of matches a quantifier can match (0x7FFFFFFF)
 +    const static int MAX_QMATCH;
 +  public:
 +    /**
 +      Call this function to compile a regular expression into a
 +      <code>Pattern</code> object. Special values can be assigned to
 +      <code>mode</code> when certain non-standard behaviors are expected from
 +      the <code>Pattern</code> object.
 +      @param pattern The regular expression to compile
 +      @param mode    A bitwise or of flags signalling what special behaviors are
 +                     wanted from this <code>Pattern</code> object
 +      @return If successful, <code>compile</code> returns a <code>Pattern</code>
 +              pointer. Upon failure, <code>compile</code> returns
 +              <code>NULL</code>
 +     */
 +    static Pattern                    * compile        (const bkstring & pattern,
 +                                                        const unsigned long mode = 0);
 +    /**
 +      Dont use this function. This function will compile a pattern, and cache
 +      the result. This will eventually be used as an optimization when people
 +      just want to call static methods using the same pattern over and over
 +      instead of first compiling the pattern and then using the compiled
 +      instance for matching.
 +      @param pattern The regular expression to compile
 +      @param mode    A bitwise or of flags signalling what special behaviors are
 +                     wanted from this <code>Pattern</code> object
 +      @return If successful, <code>compileAndKeep</code> returns a
 +              <code>Pattern</code> pointer. Upon failure, <code>compile</code>
 +              returns <code>NULL</code>.
 +     */
 +    static Pattern                    * compileAndKeep (const bkstring & pattern,
 +                                                        const unsigned long mode = 0);
 +
 +    /**
 +      Searches through <code>replace</code> and replaces all substrings matched
 +      by <code>pattern</code> with <code>str</code>. <code>str</code> may
 +      contain backreferences (e.g. <code>\1</code>) to capture groups. A typical
 +      invocation looks like:
 +      <p>
 +      <code>
 +      Pattern::replace("(a+)b(c+)", "abcccbbabcbabc", "\\2b\\1");
 +      </code>
 +      <p>
 +      which would replace <code>abcccbbabcbabc</code> with
 +      <code>cccbabbcbabcba</code>.
 +      @param pattern          The regular expression
 +      @param str              The replacement text
 +      @param replacementText  The string in which to perform replacements
 +      @param mode             The special mode requested of the <code>Pattern</code>
 +                              during the replacement process
 +      @return The text with the replacement string substituted where necessary
 +     */
 +    static bkstring                  replace        (const bkstring & pattern,
 +                                                        const bkstring & str,
 +                                                        const bkstring & replacementText,
 +                                                        const unsigned long mode = 0);
 +
 +    /**
 +      Splits the specified string over occurrences of the specified pattern.
 +      Empty strings can be optionally ignored. The number of strings returned is
 +      configurable. A typical invocation looks like:
 +      <p>
 +      <code>
 +      bkstring str(strSize, '\0');<br>
 +      FILE * fp = fopen(fileName, "r");<br>
 +      fread((char*)str.data(), strSize, 1, fp);<br>
 +      fclose(fp);<br>
 +      <br>
 +      std::vector<bkstring> lines = Pattern::split("[\r\n]+", str, true);<br>
 +      <br>
 +      </code>
 +
 +      @param pattern    The regular expression
 +      @param replace    The string to split
 +      @param keepEmptys Whether or not to keep empty strings
 +      @param limit      The maximum number of splits to make
 +      @param mode       The special mode requested of the <code>Pattern</code>
 +                        during the split process
 +      @return All substrings of <code>str</code> split across <code>pattern</code>.
 +     */
 +    static std::vector<bkstring>     split          (const bkstring & pattern,
 +                                                        const bkstring & str,
 +                                                        const bool keepEmptys = 0,
 +                                                        const unsigned long limit = 0,
 +                                                        const unsigned long mode = 0);
 +
 +    /**
 +      Finds all the instances of the specified pattern within the string. You
 +      should be careful to only pass patterns with a minimum length of one. For
 +      example, the pattern <code>a*</code> can be matched by an empty string, so
 +      instead you should pass <code>a+</code> since at least one character must
 +      be matched. A typical invocation of <code>findAll</code> looks like:
 +      <p>
 +      <code>
 +      std::vector<td::string> numbers = Pattern::findAll("\\d+", string);
 +      </code>
 +      <p>
 +
 +      @param pattern  The pattern for which to search
 +      @param str      The string to search
 +      @param mode     The special mode requested of the <code>Pattern</code>
 +                      during the find process
 +      @return All instances of <code>pattern</code> in <code>str</code>
 +     */
 +    static std::vector<bkstring>     findAll        (const bkstring & pattern,
 +                                                        const bkstring & str,
 +                                                        const unsigned long mode = 0);
 +
 +    /**
 +      Determines if an entire string matches the specified pattern
 +
 +      @param pattern  The pattern for to match
 +      @param str      The string to match
 +      @param mode     The special mode requested of the <code>Pattern</code>
 +                      during the replacement process
 +      @return True if <code>str</code> is recognized by <code>pattern</code>
 +     */
 +    static bool                         matches        (const bkstring & pattern,
 +                                                        const bkstring & str,
 +                                                        const unsigned long mode = 0);
 +
 +    /**
 +      Registers a pattern under a specific name for use in later compilations.
 +      A typical invocation and later use looks like:
 +      <p>
 +      <code>
 +      Pattern::registerPattern("ip", "(?:\\d{1,3}\\.){3}\\d{1,3}");<br>
 +      Pattern * p1 = Pattern::compile("{ip}:\\d+");<br>
 +      Pattern * p2 = Pattern::compile("Connection from ({ip}) on port \\d+");<br>
 +      </code>
 +      <p>
 +      Multiple calls to <code>registerPattern</code> with the same
 +      <code>name</code> will result in the pattern getting overwritten.
 +
 +      @param name     The name to give to the pattern
 +      @param pattern  The pattern to register
 +      @param mode     Any special flags to use when compiling pattern
 +      @return Success/Failure. Fails only if <code>pattern</code> has invalid
 +              syntax
 +     */
 +    static bool                         registerPattern(const bkstring & name,
 +                                                        const bkstring & pattern,
 +                                                        const unsigned long mode = 0);
 +
 +    /**
 +      Clears the pattern registry
 +      */
 +    static void                         unregisterPatterns();
 +    /**
 +      Don't use
 +     */
 +    static void                         clearPatternCache();
 +
 +    /**
 +      Searches through a string for the <code>n<sup>th</sup></code> match of the
 +      given pattern in the string. Match indeces start at zero, not one.
 +      A typical invocation looks like this:
 +      <p>
 +      <code>
 +      std::pair<bkstring, int> match = Pattern::findNthMatch("\\d{1,3}", "192.168.1.101:22", 1);<br>
 +      printf("%s %i\n", match.first.c_str(), match.second);<br>
 +      <br>
 +      Output: 168 4<br>
 +      <br>
 +
 +      @param pattern  The pattern for which to search
 +      @param str      The string to search
 +      @param matchNum Which match to find
 +      @param mode     Any special flags to use during the matching process
 +      @return A string and an integer. The string is the string matched. The
 +              integer is the starting location of the matched string in
 +              <code>str</code>. You can check for success/failure by making sure
 +              that the integer returned is greater than or equal to zero.
 +     */
 +    static std::pair<bkstring, int>  findNthMatch   (const bkstring & pattern,
 +                                                        const bkstring & str,
 +                                                        const int matchNum,
 +                                                        const unsigned long mode = 0);
 +  public:
 +    /**
 +      Deletes all NFA nodes allocated during compilation
 +     */
 +    ~Pattern();
 +
 +    bkstring               replace        (const bkstring & str,
 +                                              const bkstring & replacementText);
 +    std::vector<bkstring>  split          (const bkstring & str, const bool keepEmptys = 0,
 +                                              const unsigned long limit = 0);
 +    std::vector<bkstring>  findAll        (const bkstring & str);
 +    bool                      matches        (const bkstring & str);
 +    /**
 +      Returns the flags used during compilation of this pattern
 +      @return The flags used during compilation of this pattern
 +     */
 +    unsigned long             getFlags       () const;
 +    /**
 +      Returns the regular expression this pattern represents
 +      @return The regular expression this pattern represents
 +     */
 +    bkstring               getPattern     () const;
 +    /**
 +      Creates a matcher object using the specified string and this pattern.
 +      @param str The string to match against
 +      @return A new matcher using object using this pattern and the specified
 +              string
 +     */
 +    Matcher                 * createMatcher  (const bkstring & str);
 +};
 +
 +class NFANode
 +{
 +  friend class Matcher;
 +  public:
 +    NFANode * next;
 +    NFANode();
 +    virtual ~NFANode();
 +    virtual void findAllNodes(std::map<NFANode*, bool> & soFar);
 +    virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const = 0;
 +    inline virtual bool isGroupHeadNode()     const { return false; }
 +    inline virtual bool isStartOfInputNode()  const { return false; }
 +};
 +class NFACharNode : public NFANode
 +{
 +  protected:
 +    char ch;
 +  public:
 +    NFACharNode(const char c);
 +    virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
 +};
 +class NFACICharNode : public NFANode
 +{
 +  protected:
 +    char ch;
 +  public:
 +    NFACICharNode(const char c);
 +    virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
 +};
 +class NFAStartNode : public NFANode
 +{
 +  public:
 +    NFAStartNode();
 +    virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
 +};
 +class NFAEndNode : public NFANode
 +{
 +  public:
 +    NFAEndNode();
 +    virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
 +};
 +class NFAQuantifierNode : public NFANode
 +{
 +  public:
 +    int min, max;
 +    NFANode * inner;
 +    virtual void findAllNodes(std::map<NFANode*, bool> & soFar);
 +    NFAQuantifierNode(Pattern * pat, NFANode * internal,
 +                      const int minMatch = Pattern::MIN_QMATCH,
 +                      const int maxMatch = Pattern::MAX_QMATCH);
 +    virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
 +};
 +class NFAGreedyQuantifierNode : public NFAQuantifierNode
 +{
 +  public:
 +    NFAGreedyQuantifierNode(Pattern * pat, NFANode * internal,
 +                            const int minMatch = Pattern::MIN_QMATCH,
 +                            const int maxMatch = Pattern::MAX_QMATCH);
 +    virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
 +    virtual int matchInternal(const bkstring & str, Matcher * matcher, const int curInd, const int soFar) const;
 +};
 +class NFALazyQuantifierNode : public NFAQuantifierNode
 +{
 +  public:
 +    NFALazyQuantifierNode(Pattern * pat, NFANode * internal,
 +                          const int minMatch = Pattern::MIN_QMATCH,
 +                          const int maxMatch = Pattern::MAX_QMATCH);
 +    virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
 +};
 +class NFAPossessiveQuantifierNode : public NFAQuantifierNode
 +{
 +  public:
 +    NFAPossessiveQuantifierNode(Pattern * pat, NFANode * internal,
 +                                const int minMatch = Pattern::MIN_QMATCH,
 +                                const int maxMatch = Pattern::MAX_QMATCH);
 +    virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
 +};
 +class NFAAcceptNode : public NFANode
 +{
 +  public:
 +    NFAAcceptNode();
 +    virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
 +};
 +class NFAClassNode : public NFANode
 +{
 +  public:
 +    bool inv;
 +    std::map<char, bool> vals;
 +    NFAClassNode(const bool invert = 0);
 +    NFAClassNode(const bkstring & clazz, const bool invert);
 +    virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
 +};
 +class NFACIClassNode : public NFANode
 +{
 +  public:
 +    bool inv;
 +    std::map<char, bool> vals;
 +    NFACIClassNode(const bool invert = 0);
 +    NFACIClassNode(const bkstring & clazz, const bool invert);
 +    virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
 +};
 +class NFASubStartNode : public NFANode
 +{
 +  public:
 +    NFASubStartNode();
 +    virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
 +};
 +class NFAOrNode : public NFANode
 +{
 +  public:
 +    NFANode * one;
 +    NFANode * two;
 +    NFAOrNode(NFANode * first, NFANode * second);
 +    virtual void findAllNodes(std::map<NFANode*, bool> & soFar);
 +    virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
 +};
 +class NFAQuoteNode : public NFANode
 +{
 +  public:
 +    bkstring qStr;
 +    NFAQuoteNode(const bkstring & quoted);
 +    virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
 +};
 +class NFACIQuoteNode : public NFANode
 +{
 +  public:
 +    bkstring qStr;
 +    NFACIQuoteNode(const bkstring & quoted);
 +    virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
 +};
 +class NFALookAheadNode : public NFANode
 +{
 +  public:
 +    bool pos;
 +    NFANode * inner;
 +    NFALookAheadNode(NFANode * internal, const bool positive);
 +    virtual void findAllNodes(std::map<NFANode*, bool> & soFar);
 +    virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
 +};
 +class NFALookBehindNode : public NFANode
 +{
 +  public:
 +    bool pos;
 +    bkstring mStr;
 +    NFALookBehindNode(const bkstring & str, const bool positive);
 +    virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
 +};
 +class NFAStartOfLineNode : public NFANode
 +{
 +  public:
 +    NFAStartOfLineNode();
 +    virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
 +};
 +class NFAEndOfLineNode : public NFANode
 +{
 +  public:
 +    NFAEndOfLineNode();
 +    virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
 +};
 +class NFAReferenceNode : public NFANode
 +{
 +  public:
 +    int gi;
 +    NFAReferenceNode(const int groupIndex);
 +    virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
 +};
 +class NFAStartOfInputNode : public NFANode
 +{
 +  public:
 +    NFAStartOfInputNode();
 +    virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
 +    inline virtual bool isStartOfInputNode()  const { return true; }
 +};
 +class NFAEndOfInputNode : public NFANode
 +{
 +  public:
 +    bool term;
 +    NFAEndOfInputNode(const bool lookForTerm);
 +    virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
 +};
 +class NFAWordBoundaryNode : public NFANode
 +{
 +  public:
 +    bool pos;
 +    NFAWordBoundaryNode(const bool positive);
 +    virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
 +};
 +class NFAEndOfMatchNode : public NFANode
 +{
 +  public:
 +    NFAEndOfMatchNode();
 +    virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
 +};
 +class NFAGroupHeadNode : public NFANode
 +{
 +  public:
 +    int gi;
 +    NFAGroupHeadNode(const int groupIndex);
 +    virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
 +    inline virtual bool isGroupHeadNode()     const { return true; }
 +};
 +class NFAGroupTailNode : public NFANode
 +{
 +  public:
 +    int gi;
 +    NFAGroupTailNode(const int groupIndex);
 +    virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
 +};
 +class NFAGroupLoopPrologueNode : public NFANode
 +{
 +  public:
 +    int gi;
 +    NFAGroupLoopPrologueNode(const int groupIndex);
 +    virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
 +};
 +class NFAGroupLoopNode : public NFANode
 +{
 +  public:
 +    int gi, min, max, type;
 +    NFANode * inner;
 +    NFAGroupLoopNode(NFANode * internal, const int minMatch,
 +                     const int maxMatch, const int groupIndex, const int matchType);
 +    virtual void findAllNodes(std::map<NFANode*, bool> & soFar);
 +    virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
 +    int matchGreedy(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
 +    int matchLazy(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
 +    int matchPossessive(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
 +};
 +
 +#endif
 +
 diff --git a/plugins/SmileyAdd/src/regexp/WCMatcher.cpp b/plugins/SmileyAdd/src/regexp/WCMatcher.cpp new file mode 100644 index 0000000000..bed5a1944b --- /dev/null +++ b/plugins/SmileyAdd/src/regexp/WCMatcher.cpp @@ -0,0 +1,181 @@ +#include "WCMatcher.h"
 +#include "WCPattern.h"
 +
 +const int WCMatcher::MATCH_ENTIRE_STRING = 0x01;
 +
 +/*
 +  Detailed documentation is provided in this class' header file
 +
 +  @author   Jeffery Stuart
 +  @since    November 2004
 +  @version  1.07.00
 +*/
 +
 +WCMatcher::WCMatcher(WCPattern * pattern, const bkstring & text)
 +{
 +  pat = pattern;
 +  str = &text;
 +  gc = pattern->groupCount;
 +  ncgc = -pattern->nonCapGroupCount;
 +  flags = 0;
 +  matchedSomething = false;
 +  starts        = new int[gc + ncgc];
 +  ends          = new int[gc + ncgc];
 +  groups        = new int[gc + ncgc];
 +  groupPos      = new int[gc + ncgc];
 +  groupIndeces  = new int[gc + ncgc];
 +  starts        = starts        + ncgc;
 +  ends          = ends          + ncgc;
 +  groups        = groups        + ncgc;
 +  groupPos      = groupPos      + ncgc;
 +  groupIndeces  = groupIndeces  + ncgc;
 +  for (int i = 0; i < gc; ++i) starts[i] = ends[i] = 0;
 +}
 +WCMatcher::~WCMatcher()
 +{
 +  delete [] (starts       - ncgc);
 +  delete [] (ends         - ncgc);
 +  delete [] (groups       - ncgc);
 +  delete [] (groupIndeces - ncgc);
 +  delete [] (groupPos     - ncgc);
 +}
 +void WCMatcher::clearGroups()
 +{
 +  int i;
 +  lm = 0;
 +  for (i = 0; i < gc; ++i)    groups[i] = starts[i] = ends[i] = -1;
 +  for (i = 1; i <= ncgc; ++i) groups[0 - i] = starts[0 - i] = ends[0 - i] = -1;
 +}
 +bkstring WCMatcher::replaceWithGroups(const bkstring & str)
 +{
 +  bkstring ret = L"";
 +
 +  bkstring t = str;
 +  while (t.size() > 0)
 +  {
 +    if (t[0] == (wchar_t)'\\')
 +    {
 +      t.erase(0, 1);
 +      if (t.size() == 0)
 +      {
 +        ret += L"\\";
 +      }
 +      else if (t[0] < (wchar_t)'0' || t[0] > (wchar_t)'9')
 +      {
 +        ret += t[0];
 +        t.erase(0, 1);
 +      }
 +      else
 +      {
 +        int gn = 0;
 +        while (t.size() > 0 && t[0] >= (wchar_t)'0' && t[0] <= (wchar_t)'9')
 +        {
 +          gn = gn * 10 + (t[0] - (wchar_t)'0');
 +          t.erase(0, 1);
 +        }
 +        ret += getGroup(gn);
 +      }
 +    }
 +    else
 +    {
 +      ret += t[0];
 +      t.erase(0, 1);
 +    }
 +  }
 +
 +  return ret;
 +}
 +unsigned long WCMatcher::getFlags() const
 +{
 +  return flags;
 +}
 +const bkstring& WCMatcher::getText() const
 +{
 +  return *str;
 +}
 +
 +bool WCMatcher::matches()
 +{
 +  flags = MATCH_ENTIRE_STRING;
 +  matchedSomething = false;
 +  clearGroups();
 +  lm = 0;
 +  return pat->head->match(*str, this, 0) == (int)str->size();
 +}
 +bool WCMatcher::findFirstMatch()
 +{
 +  starts[0] = 0;
 +  flags = 0;
 +  clearGroups();
 +  start = 0;
 +  lm = 0;
 +  ends[0] = pat->head->match(*str, this, 0);
 +  if (ends[0] >= 0)
 +  {
 +    matchedSomething = true;
 +    return 1;
 +  }
 +  return 0;
 +}
 +bool WCMatcher::findNextMatch()
 +{
 +  int s = starts[0], e = ends[0];
 +
 +  if (!matchedSomething) return findFirstMatch();
 +  if (s == e) ++e;
 +  flags = 0;
 +  clearGroups();
 +
 +  starts[0] = e;
 +  if (e >= (int)str->size()) return 0;
 +  start = e;
 +  lm = e;
 +  ends[0] = pat->head->match(*str, this, e);
 +  return ends[0] >= 0;
 +}
 +std::vector<bkstring> WCMatcher::findAll()
 +{
 +  std::vector<bkstring> ret;
 +  reset();
 +  while (findNextMatch())
 +  {
 +    ret.push_back(getGroup());
 +  }
 +  return ret;
 +}
 +
 +void WCMatcher::reset()
 +{
 +  lm = 0;
 +  clearGroups();
 +  matchedSomething = false;
 +}
 +
 +int WCMatcher::getStartingIndex(const int groupNum) const
 +{
 +  if (groupNum < 0 || groupNum >= gc) return -1;
 +  return starts[groupNum];
 +}
 +int WCMatcher::getEndingIndex(const int groupNum) const
 +{
 +  if (groupNum < 0 || groupNum >= gc) return -1;
 +  return ends[groupNum];
 +}
 +bkstring WCMatcher::getGroup(const int groupNum) const
 +{
 +  if (groupNum < 0 || groupNum >= gc) return L"";
 +  if (starts[groupNum] < 0 || ends[groupNum] < 0) return L"";
 +  return str->substr(starts[groupNum], ends[groupNum] - starts[groupNum]);
 +}
 +std::vector<bkstring> WCMatcher::getGroups(const bool includeGroupZero) const
 +{
 +  int i, start = (includeGroupZero ? 0 : 1);
 +  std::vector<bkstring> ret;
 +
 +  for (i = start; i < gc; ++i)
 +  {
 +    ret.push_back(getGroup(i));
 +  }
 +  return ret;
 +}
 +
 diff --git a/plugins/SmileyAdd/src/regexp/WCMatcher.h b/plugins/SmileyAdd/src/regexp/WCMatcher.h new file mode 100644 index 0000000000..23cc49e41f --- /dev/null +++ b/plugins/SmileyAdd/src/regexp/WCMatcher.h @@ -0,0 +1,234 @@ +#ifndef __WCMATCHER_H__
 +#define __WCMATCHER_H__
 +
 +#include "bkstring.h"
 +#include <vector>
 +#include <WCPattern.h>
 +
 +/**
 +  A matcher is a non thread-safe object used to scan strings using a given
 +  {@link WCPattern WCPattern} object. Using a <code>WCMatcher</code> is the preferred
 +  method for scanning strings. WCMatchers are not thread-safe. WCMatchers require
 +  very little dynamic memory, hence one is encouraged to create several
 +  instances of a matcher when necessary as opposed to sharing a single instance
 +  of a matcher.
 +  <p>
 +  The most common methods needed by the matcher are <code>matches</code>,
 +  <code>findNextMatch</code>, and <code>getGroup</code>. <code>matches</code>
 +  and <code>findNextMatch</code> both return success or failure, and further
 +  details can be gathered from their documentation.
 +  <p>
 +  Unlike Java's <code>WCMatcher</code>, this class allows you to change the string
 +  you are matching against. This provides a small optimization, since you no
 +  longer need multiple matchers for a single pattern in a single thread.
 +  <p>
 +  This class also provides an extremely handy method for replacing text with
 +  captured data via the <code>replaceWithGroups</code> method. A typical
 +  invocation looks like:
 +  <pre>
 +  wchar_t buf[10000];
 +  bkstring str = "\\5 (user name \\1) uses \\7 for his/her shell and \\6 is their home directory";
 +  FILE * fp = fopen("/etc/passwd", "r");
 +  WCPattern::registerWCPattern("entry", "[^:]+");
 +  WCPattern * p = WCPattern::compile("^({entry}):({entry}):({entry}):({entry}):({entry}):({entry}):({entry})$",
 +                                 WCPattern::MULTILINE_MATCHING | WCPattern::UNIX_LINE_MODE);
 +  WCMatcher * m = p->createWCMatcher("");
 +  while (fgets(buf, 9999, fp))
 +  {
 +    m->setString(buf);
 +    if (m->matches())
 +    {
 +      printf("%s\n", m->replaceWithGroups(str).c_str());
 +    }
 +  }
 +  fclose(fp);
 +
 +  </pre>
 +  Calling any of the following functions before first calling
 +  <code>matches</code>, <code>findFirstMatch</code>, or
 +  <code>findNextMatch</code> results in undefined behavior and may cause your
 +  program to crash.
 +  <code>
 +  <ul>
 +    <li>replaceWithGroups</code>
 +    <li>getStartingIndex</li>
 +    <li>getEndingIndex</li>
 +    <li>getGroup</li>
 +    <li>getGroups</li>
 +  </ul>
 +  </code>
 +  <p>
 +  The function <code>findFirstMatch</code> will attempt to find the first match
 +  in the input string. The same results can be obtained by first calling
 +  <code>reset</code> followed by <code>findNextMatch</code>.
 +  <p>
 +  To eliminate the necessity of looping through a string to find all the
 +  matching substrings, <code>findAll</code> was created. The function will find
 +  all matching substrings and return them in a <code>vector</code>. If you need
 +  to examine specific capture groups within the substrings, then this method
 +  should not be used.
 +
 +  @author    Jeffery Stuart
 +  @since     March 2003, Stable Since November 2004
 +  @version   1.05.00
 +  @memo Mutable object used on instances of a WCPattern class
 + */
 +class WCMatcher
 +{
 +  friend class NFAUNode;
 +  friend class NFAStartUNode;
 +  friend class NFAEndUNode;
 +  friend class NFAGroupHeadUNode;
 +  friend class NFAGroupLoopUNode;
 +  friend class NFAGroupLoopPrologueUNode;
 +  friend class NFAGroupTailUNode;
 +  friend class NFALookBehindUNode;
 +  friend class NFAStartOfLineUNode;
 +  friend class NFAEndOfLineUNode;
 +  friend class NFAEndOfMatchUNode;
 +  friend class NFAReferenceUNode;
 +  friend class WCPattern;
 +  private:
 +    /**
 +      Creates a new matcher object against <code>text</code> using
 +      <code>pattern</code>.
 +
 +      @param pattern  The pattern with which to search
 +      @param text     The text in which to search
 +     */
 +    WCMatcher(WCPattern * pattern, const bkstring & text);
 +  protected:
 +    /// The pattern we use to match
 +    WCPattern * pat;
 +    /// The string in which we are matching
 +    const bkstring * str;
 +    /// The starting point of our match
 +    int start;
 +    /// An array of the starting positions for each group
 +    int * starts;
 +    /// An array of the ending positions for each group
 +    int * ends;
 +    /// An array of private data used by NFAUNodes during matching
 +    int * groups;
 +    /// An array of private data used by NFAUNodes during matching
 +    int * groupIndeces;
 +    /// An array of private data used by NFAUNodes during matching
 +    int * groupPos;
 +    /// The ending index of the last match
 +    int lm;
 +    /// The number of capturing groups we have
 +    int gc;
 +    /// The number of non-capturing groups we havew
 +    int ncgc;
 +    /// Whether or not we have matched something (used only by findFirstMatch and findNextMatch)
 +    int matchedSomething;
 +    /// The flags with which we were made
 +    unsigned long flags;
 +    /// Called by reset to clear the group arrays
 +    void clearGroups();
 +  public:
 +    /// Used internally by match to signify we want the entire string matched
 +    const static int MATCH_ENTIRE_STRING;
 +  public:
 +    /// Cleans up the dynamic memory used by this matcher
 +    ~WCMatcher();
 +    /**
 +      Replaces the contents of <code>str</code> with the appropriate captured
 +      text. <code>str</code> should have at least one back reference, otherwise
 +      this function does nothing.
 +      @param str The string in which to replace text
 +      @return A string with all backreferences appropriately replaced
 +     */
 +    bkstring replaceWithGroups(const bkstring & str);
 +    /**
 +      The flags currently being used by the matcher.
 +      @return Zero
 +     */
 +    unsigned long getFlags() const;
 +    /**
 +      The text being searched by the matcher.
 +      @return the text being searched by the matcher.
 +     */
 +    const bkstring& getText() const;
 +
 +    /**
 +      Scans the string from start to finish for a match. The entire string must
 +      match for this function to return success. Group variables are
 +      appropriately set and can be queried after this function returns.
 +
 +      @return Success if and only if the entire string matches the pattern
 +     */
 +    bool matches();
 +    /**
 +      Scans the string for the first substring matching the pattern. The entire
 +      string does not necessarily have to match for this function to return
 +      success. Group variables are appropriately set and can be queried after
 +      this function returns.
 +
 +      @return Success if any substring matches the specified pattern
 +     */
 +    bool findFirstMatch();
 +    /**
 +      Scans the string for the next substring matching the pattern. If no calls
 +      have been made to findFirstMatch of findNextMatch since the last call to
 +      reset, matches, or setString, then this function's behavior results to
 +      that of findFirstMatch.
 +
 +      @return Success if another substring can be found that matches the pattern
 +     */
 +    bool findNextMatch();
 +    /**
 +      Returns a vector of every substring in order which matches the given
 +      pattern.
 +
 +      @return Every substring in order which matches the given pattern
 +     */
 +    std::vector<bkstring> findAll();
 +    /**
 +      Resets the internal state of the matcher
 +     */
 +    void reset();
 +    /**
 +      Same as getText. Left n for backwards compatibilty with old source code
 +      @return Returns the string that is currently being used for matching
 +     */
 +    inline const bkstring&  getString()                           const { return *str; }
 +    /**
 +      Sets the string to scan
 +      @param newStr The string to scan for subsequent matches
 +     */
 +    inline void         setString(const bkstring & newStr)       { str = &newStr; reset(); }
 +
 +    /**
 +      Returns the starting index of the specified group.
 +      @param groupNum The group to query
 +      @return The starting index of the group if it was matched, -1 for an
 +              invalid group or if the group was not matched
 +     */
 +    int getStartingIndex(const int groupNum = 0) const;
 +    /**
 +      Returns the ending index of the specified group.
 +      @param groupNum The group to query
 +      @return The ending index of the group if it was matched, -1 for an
 +              invalid group or if the group was not matched
 +     */
 +    int getEndingIndex(const int groupNum = 0) const;
 +    /**
 +      Returns the specified group. An empty string ("") does not necessarily
 +      mean the group was not matched. A group such as (a*b?) could be matched by
 +      a zero length. If an empty string is returned, getStartingIndex can be
 +      called to determine if the group was actually matched.
 +      @param groupNum The group to query
 +      @return The text of the group
 +     */
 +    bkstring getGroup(const int groupNum = 0) const;
 +    /**
 +      Returns every capture group in a vector
 +
 +      @param includeGroupZero Whether or not include capture group zero
 +      @return Every capture group
 +     */
 +    std::vector<bkstring> getGroups(const bool includeGroupZero = 0) const;
 +};
 +
 +#endif
 diff --git a/plugins/SmileyAdd/src/regexp/WCPattern.cpp b/plugins/SmileyAdd/src/regexp/WCPattern.cpp new file mode 100644 index 0000000000..25f379f5e4 --- /dev/null +++ b/plugins/SmileyAdd/src/regexp/WCPattern.cpp @@ -0,0 +1,1747 @@ +/**
 +  From the author (Jeff Stuart)
 +  "
 +  Let me start by saying this file is pretty big. If you feel up to it, you can
 +  try making changes yourself, but you would be better off to just email me at
 +  stuart@cs.ucdavis.edu if you think there is a bug, or have something useful you
 +  would like added. This project is very "near and dear" to me, so I am fairly quick
 +  to make bug fixes. The header files for WCPattern and WCMatcher are fairly well
 +  documented and the function names are pretty self-explanatory, but if you are having
 +  any trouble, feel free to email me at stuart@cs.ucdavis.edu.
 +
 +  If you email me, make sure you put something like C++RE in the subject because
 +  I tend to delete email if I don't recognize the name and the subject is
 +  something like "I Need Your Help" or "Got A Second" or "I Found It".
 +  "
 + */
 +
 +/*
 +  Detailed documentation is provided in this class' header file
 +
 +  @author   Jeffery Stuart
 +  @since    November 2004
 +  @version  1.07.00
 +*/
 +
 +#ifdef _WIN32
 +  #pragma warning(push)
 +  #pragma warning(disable:4996)
 +#endif
 +
 +#include <WCPattern.h>
 +#include <WCMatcher.h>
 +#include <wchar.h>
 +#include <algorithm>
 +#ifndef _WIN32
 +  #include <wctype.h>
 +#endif
 +
 +std::map<bkstring, WCPattern *> WCPattern::compiledWCPatterns;
 +std::map<bkstring, std::pair<bkstring, unsigned long> > WCPattern::registeredWCPatterns;
 +
 +const int WCPattern::MIN_QMATCH = 0x00000000;
 +const int WCPattern::MAX_QMATCH = 0x7FFFFFFF;
 +
 +const unsigned long WCPattern::CASE_INSENSITIVE       = 0x01;
 +const unsigned long WCPattern::LITERAL                = 0x02;
 +const unsigned long WCPattern::DOT_MATCHES_ALL        = 0x04;
 +const unsigned long WCPattern::MULTILINE_MATCHING     = 0x08;
 +const unsigned long WCPattern::UNIX_LINE_MODE         = 0x10;
 +
 +#define to_lower(a) (wchar_t)(UINT_PTR)CharLowerW((LPWSTR)(unsigned)a)
 +#define is_alpha IsCharAlphaW
 +
 +#if defined(_WIN32)
 +  #define str_icmp lstrcmpiW
 +#elif defined(__CYGWIN__) || defined(__APPLE__)
 +  #include <wctype.h>
 +  static inline int str_icmp(const wchar_t * a, const wchar_t * b)
 +  {
 +    while (*a && *b)
 +    {
 +      const int t = (int)towlower(*a) - (int)tolower(*b);
 +      if (t) return t;
 +      ++a; ++b;
 +    }
 +    if (*a)
 +    {
 +      if (*b) return (int)towlower(*a) - (int)tolower(*b);
 +      return 1;
 +    }
 +    else if (*b) return 1;
 +    return 0;
 +  }
 +#else
 +  #define str_icmp wcscasecmp
 +#endif
 +
 +WCPattern::WCPattern(const bkstring & rhs)
 +{
 +  matcher = NULL;
 +  pattern = rhs;
 +  curInd = 0;
 +  groupCount = 0;
 +  nonCapGroupCount = 0;
 +  error = 0;
 +  head = NULL;
 +}
 +// convenience function in case we want to add any extra debugging output
 +void WCPattern::raiseError()
 +{
 +/*  switch (pattern[curInd - 1])
 +  {
 +  case '*':
 +  case ')':
 +  case '+':
 +  case '?':
 +  case ']':
 +  case '}':
 +    fwprintf(stderr, L"%s\n%*c^\n", pattern.c_str(), curInd - 1, ' ');
 +    fwprintf(stderr, L"Syntax Error near here. Possible unescaped meta character.\n");
 +    break;
 +  default:
 +    fwprintf(stderr, L"%s\n%*c^\n", pattern.c_str(), curInd - 1, ' ');
 +    fwprintf(stderr, L"Syntax Error near here. \n");
 +    break;
 +  }*/
 +  error = 1;
 +}
 +NFAUNode * WCPattern::registerNode(NFAUNode * node)
 +{
 +  nodes[node] = 1;
 +  return node;
 +}
 +
 +bkstring WCPattern::classUnion      (bkstring s1, bkstring s2)  const
 +{
 +  wchar_t * out = new wchar_t[66000];
 +  std::sort(s1.begin(), s1.end());
 +  std::sort(s2.begin(), s2.end());
 +  wchar_t* p = std::set_union(s1.begin(), s1.end(), s2.begin(), s2.end(), out); *p = 0;
 +  bkstring ret = out;
 +  delete [] out;
 +  return ret;
 +}
 +bkstring WCPattern::classIntersect  (bkstring s1, bkstring s2)  const
 +{
 +  wchar_t * out = new wchar_t[66000];
 +  std::sort(s1.begin(), s1.end());
 +  std::sort(s2.begin(), s2.end());
 +  *std::set_intersection(s1.begin(), s1.end(), s2.begin(), s2.end(), out) = 0;
 +  bkstring ret = out;
 +  delete [] out;
 +  return ret;
 +}
 +bkstring WCPattern::classNegate     (bkstring s1)                  const
 +{
 +  wchar_t * out = new wchar_t[66000];
 +  int i, ind = 0;
 +  std::map<wchar_t, bool> m;
 +
 +  for (i = 0; i < (int)s1.size(); ++i) m[s1[i]] = 1;
 +  for (i = 0xFF; i >= 0; --i) if (m.find((wchar_t)i) == m.end()) out[ind++] = (wchar_t)i;
 +  out[ind] = 0;
 +  bkstring ret(out, ind);
 +  delete [] out;
 +  return ret;
 +}
 +bkstring WCPattern::classCreateRange(wchar_t low, wchar_t hi)    const
 +{
 +  wchar_t out[300];
 +  int ind = 0;
 +  while (low != hi) out[ind++] = low++;
 +  out[ind++] = low;
 +  return bkstring(out, ind);
 +}
 +
 +int WCPattern::getInt(int start, int end)
 +{
 +  int ret = 0;
 +  for (; start <= end; ++start) ret = ret * 10 + (pattern[start] - (wchar_t)'0');
 +  return ret;
 +}
 +bool WCPattern::quantifyCurly(int & sNum, int & eNum)
 +{
 +  bool good = 1;
 +  int i, ci = curInd + 1;
 +  int commaInd = ci, endInd = ci, len = pattern.size();
 +  sNum = eNum = 0;
 +
 +  while (endInd   < len     && pattern[endInd  ] != (wchar_t)'}') ++endInd;
 +  while (commaInd < endInd  && pattern[commaInd] != (wchar_t)',') ++commaInd;
 +  if (endInd >= len) { raiseError(); return 0; }
 +  for (i = ci; good && i < endInd; ++i) if (i != commaInd && !isdigit(pattern[i])) good = 0;
 +  if (!good && commaInd < endInd) { raiseError(); return 0; }
 +  if (!good) return 0;
 +  /* so now everything in here is either a comma (and there is at most one comma) or a digit */
 +  if (commaInd == ci) // {,*}
 +  {
 +    if (endInd == commaInd + 1)    { sNum = MIN_QMATCH;               eNum = MAX_QMATCH;                        } // {,} = *
 +    else                           { sNum = MIN_QMATCH;               eNum = getInt(commaInd + 1, endInd - 1);  } // {,+}
 +  }
 +  else if (commaInd == endInd - 1) { sNum = getInt(ci, commaInd - 1); eNum = MAX_QMATCH;                        } // {+,}
 +  else if (commaInd == endInd)     { sNum = getInt(ci, endInd - 1);   eNum = sNum;                              } // {+}
 +  else                             { sNum = getInt(ci, commaInd - 1); eNum = getInt(commaInd + 1, endInd - 1);  } // {+,+}
 +  curInd = endInd + 1;
 +  return 1;
 +}
 +NFAUNode * WCPattern::quantifyGroup(NFAUNode * start, NFAUNode * stop, const int gn)
 +{
 +  NFAUNode * newNode = NULL;
 +  int type = 0;
 +
 +  if (curInd < (int)pattern.size())
 +  {
 +    wchar_t ch = (curInd + 1 >= (int)pattern.size()) ? USHRT_MAX : pattern[curInd + 1];
 +    switch (pattern[curInd])
 +    {
 +    case (wchar_t)'*':
 +      ++curInd;
 +      switch (ch)
 +      {
 +      case (wchar_t)'?': ++curInd; type = 1; break;
 +      case (wchar_t)'+': ++curInd; type = 2; break;
 +      }
 +      newNode = registerNode(new NFAGroupLoopPrologueUNode(gn));
 +      newNode->next = registerNode(new NFAGroupLoopUNode(start, MIN_QMATCH, MAX_QMATCH, gn, type));
 +      stop->next = newNode->next;
 +      return newNode;
 +    case (wchar_t)'?':
 +      ++curInd;
 +      switch (ch)
 +      {
 +      case (wchar_t)'?': ++curInd; type = 1; break;
 +      case (wchar_t)'+': ++curInd; type = 2; break;
 +      }
 +      newNode = registerNode(new NFAGroupLoopPrologueUNode(gn));
 +      newNode->next = registerNode(new NFAGroupLoopUNode(start, MIN_QMATCH, 1, gn, type));
 +      stop->next = newNode->next;
 +      return newNode;
 +    case (wchar_t)'+':
 +      ++curInd;
 +      switch (ch)
 +      {
 +      case (wchar_t)'?': ++curInd; type = 1; break;
 +      case (wchar_t)'+': ++curInd; type = 2; break;
 +      }
 +      newNode = registerNode(new NFAGroupLoopPrologueUNode(gn));
 +      newNode->next = registerNode(new NFAGroupLoopUNode(start, 1, MAX_QMATCH, gn, type));
 +      stop->next = newNode->next;
 +      return newNode;
 +    case (wchar_t)'{':
 +      {
 +        int s, e;
 +        if (quantifyCurly(s, e))
 +        {
 +          ch = (curInd < (int)pattern.size()) ? pattern[curInd] : USHRT_MAX;
 +          switch (ch)
 +          {
 +          case (wchar_t)'?': ++curInd; type = 1; break;
 +          case (wchar_t)'+': ++curInd; type = 2; break;
 +          }
 +          newNode = registerNode(new NFAGroupLoopPrologueUNode(gn));
 +          newNode->next = registerNode(new NFAGroupLoopUNode(start, s, e, gn, type));
 +          stop->next = newNode->next;
 +          return newNode;
 +        }
 +      }
 +    default:
 +      break;
 +    }
 +  }
 +  return NULL;
 +}
 +
 +NFAUNode * WCPattern::quantify(NFAUNode * newNode)
 +{
 +  if (curInd < (int)pattern.size())
 +  {
 +    wchar_t ch = (curInd + 1 >= (int)pattern.size()) ? USHRT_MAX : pattern[curInd + 1];
 +    switch (pattern[curInd])
 +    {
 +    case (wchar_t)'*':
 +      ++curInd;
 +      switch (ch)
 +      {
 +      case (wchar_t)'?': ++curInd; newNode = registerNode(new NFALazyQuantifierUNode      (this, newNode, MIN_QMATCH, MAX_QMATCH)); break;
 +      case (wchar_t)'+': ++curInd; newNode = registerNode(new NFAPossessiveQuantifierUNode(this, newNode, MIN_QMATCH, MAX_QMATCH)); break;
 +      default:            newNode = registerNode(new NFAGreedyQuantifierUNode    (this, newNode, MIN_QMATCH, MAX_QMATCH)); break;
 +      }
 +      break;
 +    case (wchar_t)'?':
 +      ++curInd;
 +      switch (ch)
 +      {
 +      case (wchar_t)'?': ++curInd; newNode = registerNode(new NFALazyQuantifierUNode      (this, newNode, MIN_QMATCH, 1)); break;
 +      case (wchar_t)'+': ++curInd; newNode = registerNode(new NFAPossessiveQuantifierUNode(this, newNode, MIN_QMATCH, 1)); break;
 +      default:            newNode = registerNode(new NFAGreedyQuantifierUNode    (this, newNode, MIN_QMATCH, 1)); break;
 +      }
 +      break;
 +    case (wchar_t)'+':
 +      ++curInd;
 +      switch (ch)
 +      {
 +      case (wchar_t)'?': ++curInd; newNode = registerNode(new NFALazyQuantifierUNode      (this, newNode, 1, MAX_QMATCH)); break;
 +      case (wchar_t)'+': ++curInd; newNode = registerNode(new NFAPossessiveQuantifierUNode(this, newNode, 1, MAX_QMATCH)); break;
 +      default:            newNode = registerNode(new NFAGreedyQuantifierUNode    (this, newNode, 1, MAX_QMATCH)); break;
 +      }
 +      break;
 +    case (wchar_t)'{':
 +      {
 +        int s, e;
 +        if (quantifyCurly(s, e))
 +        {
 +          ch = (curInd < (int)pattern.size()) ? pattern[curInd] : USHRT_MAX;
 +          switch (ch)
 +          {
 +          case (wchar_t)'?': ++curInd; newNode = registerNode(new NFALazyQuantifierUNode      (this, newNode, s, e)); break;
 +          case (wchar_t)'+': ++curInd; newNode = registerNode(new NFAPossessiveQuantifierUNode(this, newNode, s, e)); break;
 +          default:            newNode = registerNode(new NFAGreedyQuantifierUNode    (this, newNode, s, e)); break;
 +          }
 +        }
 +      }
 +      break;
 +    default:
 +      break;
 +    }
 +  }
 +  return newNode;
 +}
 +bkstring WCPattern::parseClass()
 +{
 +  bkstring t, ret = L"";
 +  wchar_t ch, c1, c2;
 +  bool inv = 0, neg = 0, quo = 0;
 +
 +  if (curInd < (int)pattern.size() && pattern[curInd] == (wchar_t)'^')
 +  {
 +    ++curInd;
 +    neg = 1;
 +  }
 +  while (curInd < (int)pattern.size() && pattern[curInd] != (wchar_t)']')
 +  {
 +    ch = pattern[curInd++];
 +    if (ch == (wchar_t)'[')
 +    {
 +      t = parseClass();
 +      ret = classUnion(ret, t);
 +    }
 +    /*else if (ch == (wchar_t)'-')
 +    {
 +      raiseError();
 +      curInd = pattern.size();
 +    }*/
 +    else if (ch == (wchar_t)'&' && curInd < (int)pattern.size() && pattern[curInd] == (wchar_t)'&')
 +    {
 +      if (pattern[++curInd] != (wchar_t)'[')
 +      {
 +        raiseError();
 +        curInd = pattern.size();
 +      }
 +      else
 +      {
 +        ++curInd;
 +        t = parseClass();
 +        ret = classIntersect(ret, t);
 +      }
 +    }
 +    else if (ch == (wchar_t)'\\')
 +    {
 +      t = parseEscape(inv, quo);
 +      if (quo)
 +      {
 +        raiseError();
 +        curInd = pattern.size();
 +      }
 +      else if (inv || t.size() > 1) // cant be part of a range (a-z)
 +      {
 +        if (inv) t = classNegate(t);
 +        ret = classUnion(ret, t);
 +      }
 +      else if (curInd < (int)pattern.size() && pattern[curInd] == (wchar_t)'-') // part of a range (a-z)
 +      {
 +        c1 = t[0];
 +        ++curInd;
 +        if (curInd >= (int)pattern.size()) raiseError();
 +        else
 +        {
 +          c2 = pattern[curInd++];
 +          if (c2 == (wchar_t)'\\')
 +          {
 +            t = parseEscape(inv, quo);
 +            if (quo)
 +            {
 +              raiseError();
 +              curInd = pattern.size();
 +            }
 +            else if (inv || t.size() > 1) raiseError();
 +            else ret = classUnion(ret, classCreateRange(c1, c2));
 +          }
 +          else if (c2 == (wchar_t)'[' || c2 == (wchar_t)']' || c2 == (wchar_t)'-' || c2 == (wchar_t)'&')
 +          {
 +            raiseError();
 +            curInd = pattern.size();
 +          }
 +          else ret = classUnion(ret, classCreateRange(c1, c2));
 +        }
 +      }
 +      else
 +      {
 +        ret = classUnion(ret, t);
 +      }
 +    }
 +    else if (curInd < (int)pattern.size() && pattern[curInd] == (wchar_t)'-')
 +    {
 +      c1 = ch;
 +      ++curInd;
 +      if (curInd >= (int)pattern.size()) raiseError();
 +      else
 +      {
 +        c2 = pattern[curInd++];
 +        if (c2 == (wchar_t)'\\')
 +        {
 +          t = parseEscape(inv, quo);
 +          if (quo)
 +          {
 +            raiseError();
 +            curInd = pattern.size();
 +          }
 +          else if (inv || t.size() > 1) raiseError();
 +          else ret = classUnion(ret, classCreateRange(c1, c2));
 +        }
 +        else if (c2 == (wchar_t)'[' || c2 == (wchar_t)']' || c2 == (wchar_t)'-' || c2 == (wchar_t)'&')
 +        {
 +          raiseError();
 +          curInd = pattern.size();
 +        }
 +        else
 +        {
 +          ret = classUnion(ret, classCreateRange(c1, c2));
 +        }
 +      }
 +    }
 +    else
 +    {
 +      ret += L" ";
 +      ret[ret.size() - 1] = ch;
 +    }
 +  }
 +  if (curInd >= (int)pattern.size() || pattern[curInd] != (wchar_t)']')
 +  {
 +    raiseError();
 +    ret = L"";
 +  }
 +  else
 +  {
 +    ++curInd;
 +    if (neg) ret = classNegate(ret);
 +  }
 +  return ret;
 +}
 +bkstring WCPattern::parsePosix()
 +{
 +  bkstring s7 = pattern.substr(curInd, 7);
 +  if (s7 == L"{Lower}") { curInd += 7; return L"abcdefghijklmnopqrstuvwxyz";                                                                       }
 +  if (s7 == L"{Upper}") { curInd += 7; return L"ABCDEFGHIJKLMNOPQRSTUVWXYZ";                                                                       }
 +  if (s7 == L"{Alpha}") { curInd += 7; return L"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";                                             }
 +  if (s7 == L"{Digit}") { curInd += 7; return L"0123456789";                                                                                       }
 +  if (s7 == L"{Alnum}") { curInd += 7; return L"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";                                   }
 +  if (s7 == L"{Punct}") { curInd += 7; return L"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~";                                                               }
 +  if (s7 == L"{Graph}") { curInd += 7; return L"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; }
 +  if (s7 == L"{Print}") { curInd += 7; return L"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; }
 +  if (s7 == L"{Blank}") { curInd += 7; return L" \t";                                                                                              }
 +  if (s7 == L"{Space}") { curInd += 7; return L" \t\n\x0B\f\r";                                                                                    }
 +  if (s7 == L"{Cntrl}")
 +  {
 +	bkstring::value_type i;
 +    bkstring s = L" ";
 +
 +    for (i = 0; i < 5; ++i) s += s;
 +    s += L" ";
 +    for (i = 0; i <= 0x1F; ++i) s[i] = i;
 +    s[0x20] = 0x7F;
 +    curInd += 7;
 +    return s;
 +  }
 +  if (s7 == L"{ASCII}")
 +  {
 +    bkstring s(0x80, (wchar_t)' ');
 +    for (bkstring::value_type i = 0; i <= 0x7f; ++i) s[i] = i;
 +    curInd += 7;
 +    return s;
 +  }
 +  if (pattern.substr(curInd, 8) == L"{XDigit}") { curInd += 8; return L"abcdefABCDEF0123456789"; }
 +  raiseError();
 +  return L"";
 +}
 +NFAUNode * WCPattern::parseBackref()
 +{
 +  #define is_dig(x) ((x) >= (wchar_t)'0' && (x) <= (wchar_t)'9')
 +  #define to_int(x) ((x) - (wchar_t)'0')
 +  int ci = curInd;
 +  int oldRef = 0, ref = 0;
 +
 +  while (ci < (int)pattern.size() && is_dig(pattern[ci]) && (ref < 10 || ref < groupCount))
 +  {
 +    oldRef = ref;
 +    ref = ref * 10 + to_int(pattern[ci++]);
 +  }
 +  if (ci == (int)pattern.size())
 +  {
 +    oldRef = ref;
 +    ++ci;
 +  }
 +  if (oldRef < 0 || ci <= curInd)
 +  {
 +    raiseError();
 +    return registerNode(new NFAReferenceUNode(-1));
 +  }
 +  curInd = ci;
 +  return registerNode(new NFAReferenceUNode(ref));
 +
 +  #undef is_dig
 +  #undef to_int
 +}
 +bkstring WCPattern::parseOctal()
 +{
 +  #define islowoc(x)  ((x) >= (wchar_t)'0' && (x) <= (wchar_t)'3')
 +  #define isoc(x)     ((x) >= (wchar_t)'0' && (x) <= (wchar_t)'7')
 +  #define fromoc(x)   ((x) - (wchar_t)'0')
 +  int ci = curInd;
 +  wchar_t ch1 = (ci + 0 < (int)pattern.size()) ? pattern[ci + 0] : USHRT_MAX;
 +  wchar_t ch2 = (ci + 1 < (int)pattern.size()) ? pattern[ci + 1] : USHRT_MAX;
 +  wchar_t ch3 = (ci + 2 < (int)pattern.size()) ? pattern[ci + 2] : USHRT_MAX;
 +  bkstring s = L" ";
 +
 +  if (islowoc(ch1) && isoc(ch2))
 +  {
 +    curInd += 2;
 +    s[0] = fromoc(ch1) * 8 + fromoc(ch2);
 +    if (isoc(ch3))
 +    {
 +      ++curInd;
 +      s[0] = s[0] * 8 + fromoc(ch3);
 +    }
 +  }
 +  else if (isoc(ch1) && isoc(ch2))
 +  {
 +    curInd += 2;
 +    s[0] = fromoc(ch1) * 8 + fromoc(ch2);
 +  }
 +  else raiseError();
 +
 +  return s;
 +  #undef islowoc
 +  #undef isoc
 +  #undef fromoc
 +}
 +bkstring WCPattern::parseHex()
 +{
 +  #define to_low(x)   (((x) >= (wchar_t)'A' && (x) <= (wchar_t)'Z') ? ((x) - (wchar_t)'A' + (wchar_t)'a') : (x))
 +  #define is_dig(x)   ((x) >= (wchar_t)'0' && (x) <= (wchar_t)'9')
 +  #define is_hex(x)   (is_dig(x) || (to_low(x) >= (wchar_t)'a' && to_low(x) <= (wchar_t)'f'))
 +  #define to_int(x)   ((is_dig(x)) ? ((x) - (wchar_t)'0') : (to_low(x) - (wchar_t)'a' + 10))
 +
 +  int ci = curInd;
 +  wchar_t ch1 = (ci + 0 < (int)pattern.size()) ? pattern[ci + 0] : USHRT_MAX;
 +  wchar_t ch2 = (ci + 1 < (int)pattern.size()) ? pattern[ci + 1] : USHRT_MAX;
 +  wchar_t ch3 = (ci + 2 < (int)pattern.size()) ? pattern[ci + 2] : USHRT_MAX;
 +  wchar_t ch4 = (ci + 3 < (int)pattern.size()) ? pattern[ci + 3] : USHRT_MAX;
 +  bkstring s = L" ";
 +
 +  if (is_hex(ch1) && is_hex(ch2) && is_hex(ch3) && is_hex(ch4))
 +  {
 +    curInd += 2;
 +    s[0] = (to_int(ch1) << 12 & 0xF000) | (to_int(ch2) << 8 & 0x0F00) |
 +           (to_int(ch3) <<  4 & 0x0F00) | (to_int(ch4)      & 0x000F);
 +  }
 +  else if (is_hex(ch1) && is_hex(ch2))
 +  {
 +    curInd += 2;
 +    s[0] = (to_int(ch1) << 4 & 0xF0) | (to_int(ch2) & 0x0F);
 +  }
 +
 +  return s;
 +  #undef to_low
 +  #undef is_dig
 +  #undef is_hex
 +  #undef to_int
 +}
 +bkstring WCPattern::parseEscape(bool & inv, bool & quo)
 +{
 +  wchar_t ch = pattern[curInd++];
 +  bkstring classes = L"";
 +
 +  if (curInd > (int)pattern.size())
 +  {
 +    raiseError();
 +    return NULL;
 +  }
 +
 +  quo = 0;
 +  inv = 0;
 +  switch (ch)
 +  {
 +  case (wchar_t)'p': classes = parsePosix();                                                         break;
 +  case (wchar_t)'P': classes = L"!!"; classes += parsePosix();                                        break;
 +  case (wchar_t)'d': classes = L"0123456789";                                                         break;
 +  case (wchar_t)'D': classes = L"!!0123456789";                                                       break;
 +  case (wchar_t)'s': classes = L" \t\r\n\f";                                                          break;
 +  case (wchar_t)'S': classes = L"!! \t\r\n\f";                                                        break;
 +  case (wchar_t)'w': classes = L"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";    break;
 +  case (wchar_t)'W': classes = L"!!abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";  break;
 +  case (wchar_t)'0': classes = parseOctal(); break;
 +  case (wchar_t)'x': classes = parseHex();   break;
 +
 +  case (wchar_t)'Q': quo = 1;        break;
 +  case (wchar_t)'t': classes = L"\t"; break;
 +  case (wchar_t)'r': classes = L"\r"; break;
 +  case (wchar_t)'n': classes = L"\n"; break;
 +  case (wchar_t)'f': classes = L"\f"; break;
 +  case (wchar_t)'a': classes = L"\a"; break;
 +  case (wchar_t)'e': classes = L"\r"; break;
 +  default:  classes = L" "; classes[0] = ch; break;
 +  }
 +  if (classes.substr(0, 2) == L"!!")
 +  {
 +    classes = classes.substr(2);
 +    inv = 1;
 +  }
 +  return classes;
 +}
 +NFAUNode * WCPattern::parseRegisteredWCPattern(NFAUNode ** end)
 +{
 +  int i, j;
 +  bkstring s;
 +  NFAUNode * ret = NULL;
 +  for (i = curInd; i < (int)pattern.size() && pattern[i] != (wchar_t)'}'; ++i) { }
 +  if (pattern[i] != (wchar_t)'}') { raiseError(); return NULL; }
 +  if (i == curInd + 1)   { raiseError(); return NULL; } // {}
 +  if (
 +      !(
 +        (pattern[curInd] >= (wchar_t)'a' && pattern[curInd] <= (wchar_t)'z') ||
 +        (pattern[curInd] >= (wchar_t)'A' && pattern[curInd] <= (wchar_t)'Z') ||
 +        (pattern[curInd] == (wchar_t)'_')
 +       )
 +     )
 +  {
 +    raiseError();
 +    return NULL;
 +  }
 +  for (j = curInd; !error && j < i; ++j)
 +  {
 +    if (
 +        !(
 +          (pattern[j] >= (wchar_t)'a' && pattern[j] <= (wchar_t)'z') ||
 +          (pattern[j] >= (wchar_t)'A' && pattern[j] <= (wchar_t)'Z') ||
 +          (pattern[j] >= (wchar_t)'0' && pattern[j] <= (wchar_t)'9') ||
 +          (pattern[j] == (wchar_t)'_')
 +         )
 +        )
 +    {
 +      raiseError();
 +      return NULL;
 +    }
 +  }
 +  s = pattern.substr(curInd, i - curInd);
 +  if (registeredWCPatterns.find(s) == registeredWCPatterns.end()) raiseError();
 +  else
 +  {
 +    unsigned long oflags = flags;
 +    bkstring op = pattern;
 +    int ci = i + 1;
 +
 +    pattern = registeredWCPatterns[s].first;
 +    curInd = 0;
 +    flags = registeredWCPatterns[s].second;
 +
 +    --groupCount;
 +    ret = parse(0, 0, end);
 +
 +    pattern = op;
 +    curInd = ci;
 +    flags = oflags;
 +  }
 +  if (error) { *end = ret = NULL; }
 +  return ret;
 +}
 +
 +// look behind should interpret everything as a literal (except \\) since the
 +// pattern must have a concrete length
 +NFAUNode * WCPattern::parseBehind(const bool pos, NFAUNode ** end)
 +{
 +  bkstring t = L"";
 +  while (curInd < (int)pattern.size() && pattern[curInd] != (wchar_t)')')
 +  {
 +    wchar_t ch = pattern[curInd++];
 +    t += L" ";
 +    if (ch == (wchar_t)'\\')
 +    {
 +      if (curInd + 1 >= (int)pattern.size())
 +      {
 +        raiseError();
 +        return *end = registerNode(new NFACharUNode((wchar_t)' '));
 +      }
 +      ch = pattern[curInd++];
 +    }
 +    t[t.size() - 1] = ch;
 +  }
 +  if (curInd >= (int)pattern.size() || pattern[curInd] != (wchar_t)')') raiseError();
 +  else ++curInd;
 +  return *end = registerNode(new NFALookBehindUNode(t, pos));
 +}
 +NFAUNode * WCPattern::parseQuote()
 +{
 +  bool done = 0;
 +  bkstring s = L"";
 +
 +  while (!done)
 +  {
 +    if      (curInd >= (int)pattern.size())
 +    {
 +      raiseError();
 +      done = 1;
 +    }
 +    else if (pattern.substr(curInd, 2) == L"\\E")
 +    {
 +      curInd += 2;
 +      done = 1;
 +    }
 +    else if (pattern[curInd] == (wchar_t)'\\')
 +    {
 +      s += L" ";
 +      s[s.size() - 1] = pattern[++curInd];
 +      ++curInd;
 +    }
 +    else
 +    {
 +      s += L" ";
 +      s[s.size() - 1] = pattern[curInd++];
 +    }
 +  }
 +  if ((flags & WCPattern::CASE_INSENSITIVE) != 0) return registerNode(new NFACIQuoteUNode(s));
 +  return registerNode(new NFAQuoteUNode(s));
 +}
 +NFAUNode * WCPattern::parse(const bool inParen, const bool inOr, NFAUNode ** end)
 +{
 +  NFAUNode * start, * cur, * next = NULL;
 +  bkstring t;
 +  int grc = groupCount++;
 +  bool inv, quo;
 +  bool ahead = 0, pos = 0, noncap = 0, indep = 0;
 +  unsigned long oldFlags = flags;
 +
 +  if (inParen)
 +  {
 +    if (pattern[curInd] == (wchar_t)'?')
 +    {
 +      ++curInd;
 +      --groupCount;
 +      if      (pattern[curInd]           == (wchar_t)':')   { noncap = 1; ++curInd;     grc = --nonCapGroupCount; }
 +      else if (pattern[curInd]           == (wchar_t)'=')   { ++curInd;     ahead = 1;  pos = 1;                  }
 +      else if (pattern[curInd]           == (wchar_t)'!')   { ++curInd;     ahead = 1;  pos = 0;                  }
 +      else if (pattern.substr(curInd, 2) == L"<=")  { curInd += 2;  return parseBehind(1, end);                   }
 +      else if (pattern.substr(curInd, 2) == L"<!")  { curInd += 2;  return parseBehind(0, end);                   }
 +      else if (pattern[curInd]           == (wchar_t)'>')   { ++curInd;     indep = 1;                            }
 +      else
 +      {
 +        bool negate = false, done = false;
 +        while (!done)
 +        {
 +          if (curInd >= (int)pattern.size())
 +          {
 +            raiseError();
 +            return NULL;
 +          }
 +          else if (negate)
 +          {
 +            switch (pattern[curInd])
 +            {
 +            case (wchar_t)'i': flags &= ~WCPattern::CASE_INSENSITIVE;   break;
 +            case (wchar_t)'d': flags &= ~WCPattern::UNIX_LINE_MODE;     break;
 +            case (wchar_t)'m': flags &= ~WCPattern::MULTILINE_MATCHING; break;
 +            case (wchar_t)'s': flags &= ~WCPattern::DOT_MATCHES_ALL;    break;
 +            case (wchar_t)':': done = true;                             break;
 +            case (wchar_t)')':
 +              ++curInd;
 +              *end = registerNode(new NFALookBehindUNode(L"", true));
 +              return *end;
 +            case (wchar_t)'-':
 +            default: raiseError(); return NULL;
 +            }
 +          }
 +          else
 +          {
 +            switch (pattern[curInd])
 +            {
 +            case (wchar_t)'i': flags |= WCPattern::CASE_INSENSITIVE;    break;
 +            case (wchar_t)'d': flags |= WCPattern::UNIX_LINE_MODE;      break;
 +            case (wchar_t)'m': flags |= WCPattern::MULTILINE_MATCHING;  break;
 +            case (wchar_t)'s': flags |= WCPattern::DOT_MATCHES_ALL;     break;
 +            case (wchar_t)':': done = true;                             break;
 +            case (wchar_t)'-': negate = true;                           break;
 +            case (wchar_t)')':
 +              ++curInd;
 +              *end = registerNode(new NFALookBehindUNode(L"", true));
 +              return *end;
 +            default:  raiseError(); return NULL;
 +            }
 +          }
 +          ++curInd;
 +        }
 +        noncap = 1;
 +        grc = --nonCapGroupCount;
 +      }
 +
 +      if (noncap) cur = start = registerNode(new NFAGroupHeadUNode(grc));
 +      else        cur = start = registerNode(new NFASubStartUNode);
 +    }
 +    else cur = start = registerNode(new NFAGroupHeadUNode(grc));
 +  }
 +  else cur = start = registerNode(new NFASubStartUNode);
 +  while (curInd < (int)pattern.size())
 +  {
 +    wchar_t ch = pattern[curInd++];
 +
 +    next = NULL;
 +    if (error) return NULL;
 +    switch (ch)
 +    {
 +    case (wchar_t)'^':
 +      if ((flags & WCPattern::MULTILINE_MATCHING) != 0) next = registerNode(new NFAStartOfLineUNode);
 +      else                                            next = registerNode(new NFAStartOfInputUNode);
 +      break;
 +    case (wchar_t)'$':
 +      if ((flags & WCPattern::MULTILINE_MATCHING) != 0) next = registerNode(new NFAEndOfLineUNode);
 +      else                                            next = registerNode(new NFAEndOfInputUNode(0));
 +      break;
 +    case (wchar_t)'|':
 +      --groupCount;
 +      cur->next = registerNode(new NFAAcceptUNode);
 +      cur = start = registerNode(new NFAOrUNode(start, parse(inParen, 1)));
 +      break;
 +    case (wchar_t)'\\':
 +      if      (curInd < (int)pattern.size())
 +      {
 +        bool eoi = 0;
 +        switch (pattern[curInd])
 +        {
 +        case (wchar_t)'1':
 +        case (wchar_t)'2':
 +        case (wchar_t)'3':
 +        case (wchar_t)'4':
 +        case (wchar_t)'5':
 +        case (wchar_t)'6':
 +        case (wchar_t)'7':
 +        case (wchar_t)'8':
 +        case (wchar_t)'9': next = parseBackref(); break;
 +        case (wchar_t)'A': ++curInd; next = registerNode(new NFAStartOfInputUNode);     break;
 +        case (wchar_t)'B': ++curInd; next = registerNode(new NFAWordBoundaryUNode(0));  break;
 +        case (wchar_t)'b': ++curInd; next = registerNode(new NFAWordBoundaryUNode(1));  break;
 +        case (wchar_t)'G': ++curInd; next = registerNode(new NFAEndOfMatchUNode);       break;
 +        case (wchar_t)'Z': eoi = 1;
 +        case (wchar_t)'z': ++curInd; next = registerNode(new NFAEndOfInputUNode(eoi));  break;
 +        default:
 +          t = parseEscape(inv, quo);
 +          //printf("inv quo classes { %c %c %s }\n", inv ? (wchar_t)'t' : (wchar_t)'f', quo ? (wchar_t)'t' : (wchar_t)'f', t.c_str());
 +          if (!quo)
 +          {
 +            if (t.size() > 1 || inv)
 +            {
 +              if ((flags & WCPattern::CASE_INSENSITIVE) != 0) next = registerNode(new NFACIClassUNode(t, inv));
 +              else                                            next = registerNode(new NFAClassUNode(t, inv));
 +            }
 +            else
 +            {
 +              next = registerNode(new NFACharUNode(t[0]));
 +            }
 +          }
 +          else
 +          {
 +            next = parseQuote();
 +          }
 +        }
 +      }
 +      else raiseError();
 +      break;
 +    case (wchar_t)'[':
 +      if ((flags & WCPattern::CASE_INSENSITIVE) == 0)
 +      {
 +        NFAClassUNode * clazz = new NFAClassUNode();
 +        bkstring s = parseClass();
 +        for (int i = 0; i < (int)s.size(); ++i) clazz->vals[s[i]] = 1;
 +        next = registerNode(clazz);
 +      }
 +      else
 +      {
 +        NFACIClassUNode * clazz = new NFACIClassUNode();
 +        bkstring s = parseClass();
 +        for (int i = 0; i < (int)s.size(); ++i) clazz->vals[to_lower(s[i])] = 1;
 +        next = registerNode(clazz);
 +      }
 +      break;
 +    case (wchar_t)'.':
 +      {
 +        bool useN = 1, useR = 1;
 +        NFAClassUNode * clazz = new NFAClassUNode(1);
 +        if ((flags & WCPattern::UNIX_LINE_MODE)  != 0) useR = 0;
 +        if ((flags & WCPattern::DOT_MATCHES_ALL) != 0) useN = useR = 0;
 +        if (useN) clazz->vals[(wchar_t)'\n'] = 1;
 +        if (useR) clazz->vals[(wchar_t)'\r'] = 1;
 +        next = registerNode(clazz);
 +      }
 +      break;
 +    case (wchar_t)'(':
 +      {
 +        NFAUNode * end, * t1, * t2;
 +        t1 = parse(1, 0, &end);
 +        if (!t1) raiseError();
 +        else if (t1->isGroupHeadNode() && (t2 = quantifyGroup(t1, end, grc)) != NULL)
 +        {
 +          cur->next = t2;
 +          cur = t2->next;
 +        }
 +        else
 +        {
 +          cur->next = t1;
 +          cur = end;
 +        }
 +      }
 +      break;
 +    case (wchar_t)')':
 +      if (!inParen) raiseError();
 +      else if (inOr)
 +      {
 +        --curInd;
 +        cur = cur->next = registerNode(new NFAAcceptUNode);
 +        flags = oldFlags;
 +        return start;
 +      }
 +      else
 +      {
 +        if (ahead)
 +        {
 +          cur = cur->next = registerNode(new NFAAcceptUNode);
 +          flags = oldFlags;
 +          return *end = registerNode(new NFALookAheadUNode(start, pos));
 +        }
 +        else if (indep)
 +        {
 +          cur = cur->next = registerNode(new NFAAcceptUNode);
 +          flags = oldFlags;
 +          return *end = registerNode(new NFAPossessiveQuantifierUNode(this, start, 1, 1));
 +        }
 +        else // capping or noncapping, it doesnt matter
 +        {
 +          *end = cur = cur->next = registerNode(new NFAGroupTailUNode(grc));
 +          next = quantifyGroup(start, *end, grc);
 +          if (next)
 +          {
 +            start = next;
 +            *end = next->next;
 +          }
 +          flags = oldFlags;
 +          return start;
 +        }
 +      }
 +      break;
 +    case (wchar_t)'{': // registered pattern
 +      cur->next = parseRegisteredWCPattern(&next);
 +      if (cur->next) cur = next;
 +      break;
 +    case (wchar_t)'*':
 +    case (wchar_t)'+':
 +    case (wchar_t)'?':
 +//    case (wchar_t)'}':
 +//    case (wchar_t)']':
 +      raiseError();
 +      break;
 +    default:
 +      if ((flags & WCPattern::CASE_INSENSITIVE) != 0) next = registerNode(new NFACICharUNode(ch));
 +      else                                          next = registerNode(new NFACharUNode(ch));
 +      break;
 +    }
 +    if (next) cur = cur->next = quantify(next);
 +  }
 +  if (inParen) raiseError();
 +  else
 +  {
 +    if (inOr) cur = cur->next = registerNode(new NFAAcceptUNode);
 +    if (end) *end = cur;
 +  }
 +
 +  flags = oldFlags;
 +  if (error) return NULL;
 +
 +  return start;
 +}
 +
 +WCPattern * WCPattern::compile(const bkstring & pattern, const unsigned long mode)
 +{
 +  WCPattern * p = new WCPattern(pattern);
 +  NFAUNode * end;
 +
 +  p->flags = mode;
 +  if ((mode & WCPattern::LITERAL) != 0)
 +  {
 +    p->head = p->registerNode(new NFAStartUNode);
 +    if ((mode & WCPattern::CASE_INSENSITIVE) != 0)  p->head->next = p->registerNode(new NFACIQuoteUNode(pattern));
 +    else                                          p->head->next = p->registerNode(new NFAQuoteUNode(pattern));
 +    p->head->next->next = p->registerNode(new NFAEndUNode);
 +  }
 +  else
 +  {
 +    p->head = p->parse(0, 0, &end);
 +    if (!p->head)
 +    {
 +      delete p;
 +      p = NULL;
 +    }
 +    else
 +    {
 +      if (!(p->head && p->head->isStartOfInputNode()))
 +      {
 +        NFAUNode * n = p->registerNode(new NFAStartUNode);
 +        n->next = p->head;
 +        p->head = n;
 +      }
 +      end->next = p->registerNode(new NFAEndUNode);
 +    }
 +  }
 +  if (p != NULL)
 +  {
 +    p->matcher = new WCMatcher(p, L"");
 +  }
 +
 +  return p;
 +}
 +
 +WCPattern * WCPattern::compileAndKeep(const bkstring & pattern, const unsigned long mode)
 +{
 +  WCPattern * ret = NULL;
 +  std::map<bkstring, WCPattern*>::iterator it = compiledWCPatterns.find(pattern);
 +
 +  if (it != compiledWCPatterns.end())
 +  {
 +    ret = it->second;
 +  }
 +  else
 +  {
 +    ret = compile(pattern, mode);
 +    compiledWCPatterns[pattern] = ret;
 +  }
 +
 +  return ret;
 +}
 +bkstring WCPattern::replace(const bkstring & pattern, const bkstring & str,
 +                                     const bkstring & replacementText, const unsigned long mode)
 +{
 +  bkstring ret;
 +  WCPattern * p = WCPattern::compile(pattern, mode);
 +  if (p)
 +  {
 +    ret = p->replace(str, replacementText);
 +    delete p;
 +  }
 +  return ret;
 +}
 +
 +std::vector<bkstring> WCPattern::split(const bkstring & pattern, const bkstring & str, const bool keepEmptys,
 +                              const unsigned long limit, const unsigned long mode)
 +{
 +  std::vector<bkstring> ret;
 +  WCPattern * p = WCPattern::compile(pattern, mode);
 +  if (p)
 +  {
 +    ret = p->split(str, keepEmptys, limit);
 +    delete p;
 +  }
 +  return ret;
 +}
 +
 +std::vector<bkstring> WCPattern::findAll(const bkstring & pattern, const bkstring & str, const unsigned long mode)
 +{
 +  std::vector<bkstring> ret;
 +  WCPattern * p = WCPattern::compile(pattern, mode);
 +  if (p)
 +  {
 +    ret = p->findAll(str);
 +    delete p;
 +  }
 +  return ret;
 +}
 +
 +bool WCPattern::matches(const bkstring & pattern, const bkstring & str, const unsigned long mode)
 +{
 +  bool ret = 0;
 +  WCPattern * p = compile(pattern, mode);
 +
 +  if (p)
 +  {
 +    ret = p->matches(str);
 +    delete p;
 +  }
 +
 +  return ret;
 +}
 +
 +bool WCPattern::registerWCPattern(const bkstring & name, const bkstring & pattern, const unsigned long mode)
 +{
 +  WCPattern * p = WCPattern::compile(pattern, mode);
 +  if (!p) return 0;
 +  WCPattern::registeredWCPatterns[name] = std::make_pair(pattern, mode);
 +  delete p;
 +  return 1;
 +}
 +
 +void WCPattern::unregisterWCPatterns()
 +{
 +  registeredWCPatterns.clear();
 +}
 +void WCPattern::clearWCPatternCache()
 +{
 +  std::map<bkstring, WCPattern*>::iterator it;
 +  for (it = compiledWCPatterns.begin(); it != compiledWCPatterns.end(); ++it)
 +  {
 +    delete it->second;
 +  }
 +  compiledWCPatterns.clear();
 +}
 +
 +std::pair<bkstring, int>  WCPattern::findNthMatch(const bkstring & pattern, const bkstring & str,
 +                                         const int matchNum, const unsigned long mode)
 +{
 +  std::pair<bkstring, int> ret;
 +  WCPattern * p = WCPattern::compile(pattern, mode);
 +
 +  ret.second = -1;
 +  if (p)
 +  {
 +    int i = -1;
 +    p->matcher->setString(str);
 +    while (i < matchNum && p->matcher->findNextMatch()) { ++i; }
 +    if (i == matchNum && p->matcher->getStartingIndex() >= 0)
 +    {
 +      ret.first = p->matcher->getGroup(0);
 +      ret.second = p->matcher->getStartingIndex();
 +    }
 +    delete p;
 +  }
 +
 +  return ret;
 +}
 +
 +WCPattern::~WCPattern()
 +{
 +  /*
 +  nodes.clear();
 +  if (head) head->findAllNodes(nodes);
 +  */
 +  if (matcher) delete matcher;
 +  for (std::map<NFAUNode*, bool>::iterator it = nodes.begin(); it != nodes.end(); ++it) delete it->first;
 +}
 +bkstring WCPattern::replace(const bkstring & str, const bkstring & replacementText)
 +{
 +  int li = 0;
 +  bkstring ret = L"";
 +
 +  matcher->setString(str);
 +  while (matcher->findNextMatch())
 +  {
 +    ret += str.substr(li, matcher->getStartingIndex() - li);
 +    ret += matcher->replaceWithGroups(replacementText);
 +    li = matcher->getEndingIndex();
 +  }
 +  ret += str.substr(li);
 +
 +  return ret;
 +}
 +std::vector<bkstring> WCPattern::split(const bkstring & str, const bool keepEmptys, const unsigned long limit)
 +{
 +  unsigned long lim = (limit == 0 ? MAX_QMATCH : limit);
 +  int li = 0;
 +  std::vector<bkstring> ret;
 +
 +  matcher->setString(str);
 +
 +  while (matcher->findNextMatch() && ret.size() < lim)
 +  {
 +    if (matcher->getStartingIndex() == 0 && keepEmptys) ret.push_back(L"");
 +    if ((matcher->getStartingIndex() != matcher->getEndingIndex()) || keepEmptys)
 +    {
 +      if (li != matcher->getStartingIndex() || keepEmptys)
 +      {
 +        ret.push_back(str.substr(li, matcher->getStartingIndex() - li));
 +      }
 +      li = matcher->getEndingIndex();
 +    }
 +  }
 +  if (li < (int)str.size()) ret.push_back(str.substr(li));
 +
 +  return ret;
 +}
 +std::vector<bkstring> WCPattern::findAll(const bkstring & str)
 +{
 +  matcher->setString(str);
 +  return matcher->findAll();
 +}
 +bool WCPattern::matches(const bkstring & str)
 +{
 +  matcher->setString(str);
 +  return matcher->matches();
 +}
 +unsigned long WCPattern::getFlags() const
 +{
 +  return flags;
 +}
 +bkstring WCPattern::getWCPattern() const
 +{
 +  return pattern;
 +}
 +WCMatcher * WCPattern::createWCMatcher(const bkstring & str)
 +{
 +  return new WCMatcher(this, str);
 +}
 +
 +// NFAUNode
 +
 +NFAUNode::NFAUNode() { next = NULL; }
 +NFAUNode::~NFAUNode() { }
 +void NFAUNode::findAllNodes(std::map<NFAUNode*, bool> & soFar)
 +{
 +  if (soFar.find(this) == soFar.end()) return;
 +  soFar[this] = 1;
 +  if (next) next->findAllNodes(soFar);
 +}
 +
 +// NFACharUNode
 +
 +NFACharUNode::NFACharUNode(const wchar_t c) { ch = c; }
 +int NFACharUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
 +{
 +  if (curInd < (int)str.size() && str[curInd] == ch) return next->match(str, matcher, curInd + 1);
 +  return -1;
 +}
 +
 +// NFACICharUNode
 +
 +NFACICharUNode::NFACICharUNode(const wchar_t c) { ch = to_lower(c); }
 +int NFACICharUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
 +{
 +  if (curInd < (int)str.size() && to_lower(str[curInd]) == ch) return next->match(str, matcher, curInd + 1);
 +  return -1;
 +}
 +
 +// NFAStartUNode
 +
 +NFAStartUNode::NFAStartUNode() { }
 +int NFAStartUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
 +{
 +  int ret = -1, ci = curInd;
 +
 +  matcher->starts[0] = curInd;
 +  if ((matcher->getFlags() & WCMatcher::MATCH_ENTIRE_STRING) == (unsigned int)WCMatcher::MATCH_ENTIRE_STRING)
 +  {
 +    if (curInd != 0)
 +    {
 +      matcher->starts[0] = -1;
 +      return -1;
 +    }
 +    return next->match(str, matcher, 0);
 +  }
 +  while ((ret = next->match(str, matcher, ci)) == -1 && ci < (int)str.size())
 +  {
 +    matcher->clearGroups();
 +    matcher->starts[0] = ++ci;
 +  }
 +  if (ret < 0) matcher->starts[0] = -1;
 +  return ret;
 +}
 +
 +// NFAEndUNode
 +
 +NFAEndUNode::NFAEndUNode() { }
 +int NFAEndUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
 +{
 +  matcher->ends[0] = curInd;
 +  if ((matcher->getFlags() & WCMatcher::MATCH_ENTIRE_STRING) != 0)
 +  {
 +    if (curInd == (int)str.size()) return curInd;
 +    matcher->ends[0] = -1;
 +    return -1;
 +  }
 +  return curInd;
 +}
 +
 +// NFAQuantifierUNode
 +
 +void NFAQuantifierUNode::findAllNodes(std::map<NFAUNode*, bool> & soFar)
 +{
 +  inner->findAllNodes(soFar);
 +  NFAUNode::findAllNodes(soFar);
 +}
 +NFAQuantifierUNode::NFAQuantifierUNode(WCPattern * pat, NFAUNode * internal, const int minMatch, const int maxMatch)
 +{
 +  inner = internal;
 +  inner->next = pat->registerNode(new NFAAcceptUNode);
 +  min = (minMatch < WCPattern::MIN_QMATCH) ? WCPattern::MIN_QMATCH : minMatch;
 +  max = (maxMatch > WCPattern::MAX_QMATCH) ? WCPattern::MAX_QMATCH : maxMatch;
 +}
 +
 +int NFAQuantifierUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
 +{
 +  int i0, i1, i2 = 0;
 +
 +  i0 = i1 = curInd;
 +  while (i2 < min)
 +  {
 +
 +    ++i2;
 +    i1 = inner->match(str, matcher, i0);
 +    if (i1 <= i0) return i1; // i1 < i0 means i1 is -1
 +    i0 = i1;
 +  }
 +
 +  return i1;
 +}
 +// NFAGreedyQuantifierUNode
 +
 +NFAGreedyQuantifierUNode::NFAGreedyQuantifierUNode(WCPattern * pat, NFAUNode * internal, const int minMatch, const int maxMatch)
 +                        : NFAQuantifierUNode(pat, internal, minMatch, maxMatch) { }
 +int NFAGreedyQuantifierUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
 +{
 +  int t = NFAQuantifierUNode::match(str, matcher, curInd);
 +  if (t != -1) return matchInternal(str, matcher, t, min);
 +  return t;
 +}
 +int NFAGreedyQuantifierUNode::matchInternal(const bkstring & str, WCMatcher * matcher, const int curInd, const int soFar) const
 +{
 +  if (soFar >= max) return next->match(str, matcher, curInd);
 +
 +  int i, j;
 +
 +  i = inner->match(str, matcher, curInd);
 +  if (i != -1)
 +  {
 +    j = matchInternal(str, matcher, i, soFar + 1);
 +    if (j != -1) return j;
 +  }
 +  return next->match(str, matcher, curInd);
 +}
 +
 +// NFALazyQuantifierUNode
 +
 +NFALazyQuantifierUNode::NFALazyQuantifierUNode(WCPattern * pat, NFAUNode * internal, const int minMatch, const int maxMatch)
 +                      : NFAQuantifierUNode(pat, internal, minMatch, maxMatch) { }
 +int NFALazyQuantifierUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
 +{
 +  int i, j, m = NFAQuantifierUNode::match(str, matcher, curInd);
 +
 +  if (m == -1) return -1;
 +
 +  for (i = min; i < max; ++i)
 +  {
 +    j = next->match(str, matcher, m);
 +    if (j == -1)
 +    {
 +      j = inner->match(str, matcher, m);
 +      // if j < m, then j is -1, so we bail.
 +      // if j == m, then we would just go and call next->match on the same index,
 +      // but it already failed trying to match right there, so we know we can
 +      // just bail
 +      if (j <= m) return -1;
 +      m = j;
 +    }
 +    else return j;
 +  }
 +  return next->match(str, matcher, m);
 +}
 +
 +// NFAPossessiveQuantifierUNode
 +
 +NFAPossessiveQuantifierUNode::NFAPossessiveQuantifierUNode(WCPattern * pat, NFAUNode * internal, const int minMatch, const int maxMatch)
 +                            : NFAQuantifierUNode(pat, internal, minMatch, maxMatch) { }
 +int NFAPossessiveQuantifierUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
 +{
 +  int i, j, m = NFAQuantifierUNode::match(str, matcher, curInd);
 +
 +  if (m == -1) return -1;
 +  for (i = min; i < max; ++i)
 +  {
 +    j = inner->match(str, matcher, m);
 +    if (j <= m) return next->match(str, matcher, m);
 +    m = j;
 +  }
 +  return next->match(str, matcher, m);
 +}
 +
 +// NFAAcceptUNode
 +
 +NFAAcceptUNode::NFAAcceptUNode() { }
 +int NFAAcceptUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
 +{
 +  if (!next) return curInd;
 +  else return next->match(str, matcher, curInd);
 +}
 +
 +// NFAClassUNode
 +
 +NFAClassUNode::NFAClassUNode(const bool invert)
 +{
 +  inv = invert;
 +}
 +NFAClassUNode::NFAClassUNode(const bkstring & clazz, const bool invert)
 +{
 +  inv = invert;
 +  for (int i = 0; i < (int)clazz.size(); ++i) vals[clazz[i]] = 1;
 +}
 +int NFAClassUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
 +{
 +  if (curInd < (int)str.size() && ((vals.find(str[curInd]) != vals.end()) ^ inv))
 +  {
 +    return next->match(str, matcher, curInd + 1);
 +  }
 +  return -1;
 +}
 +
 +// NFACIClassUNode
 +
 +NFACIClassUNode::NFACIClassUNode(const bool invert)
 +{
 +  inv = invert;
 +}
 +NFACIClassUNode::NFACIClassUNode(const bkstring & clazz, const bool invert)
 +{
 +  inv = invert;
 +  for (int i = 0; i < (int)clazz.size(); ++i) vals[to_lower(clazz[i])] = 1;
 +}
 +int NFACIClassUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
 +{
 +  if (curInd < (int)str.size() && ((vals.find(to_lower(str[curInd])) != vals.end()) ^ inv))
 +  {
 +    return next->match(str, matcher, curInd + 1);
 +  }
 +  return -1;
 +}
 +
 +// NFASubStartUNode
 +
 +NFASubStartUNode::NFASubStartUNode() { }
 +int NFASubStartUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
 +{
 +  return next->match(str, matcher, curInd);
 +}
 +
 +// NFAOrUNode
 +
 +NFAOrUNode::NFAOrUNode(NFAUNode * first, NFAUNode * second) : one(first), two(second) { }
 +void NFAOrUNode::findAllNodes(std::map<NFAUNode*, bool> & soFar)
 +{
 +  if (one) one->findAllNodes(soFar);
 +  if (two) two->findAllNodes(soFar);
 +  NFAUNode::findAllNodes(soFar);
 +}
 +int NFAOrUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
 +{
 +  int ci = one->match(str, matcher, curInd);
 +
 +  if (ci != -1) ci = next->match(str, matcher, ci);
 +  if (ci != -1) return ci;
 +  if (ci == -1) ci = two->match(str, matcher, curInd);
 +  if (ci != -1) ci = next->match(str, matcher, ci);
 +  return ci;
 +}
 +
 +// NFAQuoteUNode
 +
 +NFAQuoteUNode::NFAQuoteUNode(const bkstring & quoted) : qStr(quoted) { }
 +int NFAQuoteUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
 +{
 +  if (curInd + qStr.size() > str.size())       return -1;
 +  if (str.substr(curInd, qStr.size()) != qStr) return -1;
 +  return next->match(str, matcher, curInd + (int)qStr.size());
 +}
 +
 +// NFACIQuoteUNode
 +
 +NFACIQuoteUNode::NFACIQuoteUNode(const bkstring & quoted) : qStr(quoted) { }
 +int NFACIQuoteUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
 +{
 +  if (curInd + qStr.size() > str.size()) return -1;
 +  if (str_icmp(str.substr(curInd, qStr.size()).c_str(),  qStr.c_str())) return -1;
 +  return next->match(str, matcher, (int)qStr.size());
 +}
 +
 +// NFALookAheadUNode
 +
 +NFALookAheadUNode::NFALookAheadUNode(NFAUNode * internal, const bool positive) : NFAUNode(), pos(positive), inner(internal) { }
 +void NFALookAheadUNode::findAllNodes(std::map<NFAUNode*, bool> & soFar)
 +{
 +  if (inner) inner->findAllNodes(soFar);
 +  NFAUNode::findAllNodes(soFar);
 +}
 +int NFALookAheadUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
 +{
 +  return ((inner->match(str, matcher, curInd) == -1) ^ pos) ? next->match(str, matcher, curInd) : -1;
 +}
 +
 +// NFALookBehindUNode
 +
 +NFALookBehindUNode::NFALookBehindUNode(const bkstring & str, const bool positive) : pos(positive), mStr(str) { }
 +int NFALookBehindUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
 +{
 +  if (pos)
 +  {
 +    if (curInd < (int)mStr.size()) return -1;
 +    if (str.substr(curInd - mStr.size(), mStr.size()) == mStr) return next->match(str, matcher, curInd);
 +  }
 +  else
 +  {
 +    if (curInd < (int)mStr.size()) return next->match(str, matcher, curInd);
 +    if (str.substr(curInd - mStr.size(), mStr.size()) == mStr) return -1;
 +     return next->match(str, matcher, curInd);
 +  }
 +  return -1;
 +}
 +
 +// NFAStartOfLineUNode
 +
 +NFAStartOfLineUNode::NFAStartOfLineUNode() { }
 +int NFAStartOfLineUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
 +{
 +  if (curInd == 0 || str[curInd - 1] == (wchar_t)'\n' || str[curInd - 1] == (wchar_t)'\r')
 +  {
 +    return next->match(str, matcher, curInd);
 +  }
 +  return -1;
 +}
 +
 +// NFAEndOfLineUNode
 +
 +NFAEndOfLineUNode::NFAEndOfLineUNode() { }
 +int NFAEndOfLineUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
 +{
 +  if (curInd >= (int)str.size() || str[curInd] == (wchar_t)'\n' || str[curInd] == (wchar_t)'\r')
 +  {
 +    return next->match(str, matcher, curInd);
 +  }
 +  return -1;
 +}
 +
 +// NFAReferenceUNode
 +
 +NFAReferenceUNode::NFAReferenceUNode(const int groupIndex) : gi(groupIndex) { }
 +int NFAReferenceUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
 +{
 +  int len = matcher->ends[gi] - matcher->starts[gi];
 +  int ni = -1;
 +  if      (gi < 1 || matcher->ends[gi] < matcher->starts[gi] || len == 0)             ni = curInd;
 +  else if (curInd + len > (int)str.size())                                            return -1;
 +  else if (str.substr(curInd, len) != str.substr(matcher->starts[gi], len))  return -1;
 +  else                                                                                ni = curInd + len;
 +
 +  return next->match(str, matcher, ni);
 +}
 +
 +// NFAStartOfInputUNode
 +
 +NFAStartOfInputUNode::NFAStartOfInputUNode() { }
 +int NFAStartOfInputUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
 +{
 +  if (curInd == 0) return next->match(str, matcher, curInd);
 +  return -1;
 +}
 +
 +// NFAEndOfInputUNode
 +
 +NFAEndOfInputUNode::NFAEndOfInputUNode(const bool lookForTerm) : term(lookForTerm) { }
 +int NFAEndOfInputUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
 +{
 +  int len = (int)str.size();
 +  if      (curInd == len) return next->match(str, matcher, curInd);
 +  else if (term)
 +  {
 +    if      (curInd == len - 1 && (str[curInd] == (wchar_t)'\r' || str[curInd] == (wchar_t)'\n'))
 +    {
 +      return next->match(str, matcher, curInd);
 +    }
 +    else if (curInd == len - 2 && str.substr(curInd, 2) == L"\r\n")
 +    {
 +      return next->match(str, matcher, curInd);
 +    }
 +  }
 +  return -1;
 +}
 +
 +// NFAWordBoundaryUNode
 +
 +NFAWordBoundaryUNode::NFAWordBoundaryUNode(const bool positive) : pos(positive) { }
 +int NFAWordBoundaryUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
 +{
 +  int len = (int)str.size();
 +
 +  wchar_t c1 = (curInd - 1 < len && curInd > 0) ? str[curInd - 1] : '\n';
 +  wchar_t c2 = (curInd     < len) ? str[curInd    ] : '\n';
 +
 +  if      (curInd == len) return next->match(str, matcher, curInd);
 +  bool ok = is_alpha(c1) != is_alpha(c2);
 +  if (ok && pos) return next->match(str, matcher, curInd);
 +  return -1;
 +}
 +
 +// NFAEndOfMatchUNode
 +
 +NFAEndOfMatchUNode::NFAEndOfMatchUNode() { }
 +int NFAEndOfMatchUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
 +{
 +  if (curInd == matcher->lm) return next->match(str, matcher, curInd);
 +  return -1;
 +}
 +
 +// NFAGroupHeadUNode
 +
 +NFAGroupHeadUNode::NFAGroupHeadUNode(const int groupIndex) : gi(groupIndex) { }
 +int NFAGroupHeadUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
 +{
 +  int ret, o = matcher->starts[gi];
 +
 +  matcher->starts[gi] = curInd;
 +  ret = next->match(str, matcher, curInd);
 +  if (ret < 0) matcher->starts[gi] = o;
 +
 +  return ret;
 +}
 +
 +// NFAGroupTailUNode
 +
 +NFAGroupTailUNode::NFAGroupTailUNode(const int groupIndex) : gi(groupIndex) { }
 +int NFAGroupTailUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
 +{
 +  int ret, o = matcher->ends[gi];
 +
 +  matcher->ends[gi] = curInd;
 +  ret = next->match(str, matcher, curInd);
 +  if (ret < 0) matcher->ends[gi] = o;
 +
 +  return ret;
 +}
 +
 +// NFAGroupLoopPrologueUNode
 +
 +NFAGroupLoopPrologueUNode::NFAGroupLoopPrologueUNode(const int groupIndex) : gi(groupIndex) { }
 +int NFAGroupLoopPrologueUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
 +{
 +  int ret, o1 = matcher->groups[gi], o2 = matcher->groupPos[gi], o3 = matcher->groupIndeces[gi];
 +
 +  matcher->groups[gi] = 0;
 +  matcher->groupPos[gi] = 0;
 +  matcher->groupIndeces[gi] = -1;
 +  ret = next->match(str, matcher, curInd);
 +  if (ret < 0)
 +  {
 +    matcher->groups[gi] = o1;
 +    matcher->groupPos[gi] = o2;
 +    matcher->groupIndeces[gi] = o3;
 +  }
 +
 +  return ret;
 +}
 +
 +// NFAGroupLoopUNode
 +
 +NFAGroupLoopUNode::NFAGroupLoopUNode(NFAUNode * internal, const int minMatch, const int maxMatch,
 +                                   const int groupIndex, const int matchType)
 +{
 +  inner = internal;
 +  min = minMatch;
 +  max = maxMatch;
 +  gi = groupIndex;
 +  type = matchType;
 +}
 +void NFAGroupLoopUNode::findAllNodes(std::map<NFAUNode*, bool> & soFar)
 +{
 +  if (inner) inner->findAllNodes(soFar);
 +  NFAUNode::findAllNodes(soFar);
 +}
 +int NFAGroupLoopUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
 +{
 +  bool b = (curInd > matcher->groupIndeces[gi]);
 +
 +  if (b && matcher->groups[gi] < min)
 +  {
 +    ++matcher->groups[gi];
 +    int o = matcher->groupIndeces[gi];
 +    matcher->groupIndeces[gi] = curInd;
 +    int ret = inner->match(str, matcher, curInd);
 +    if (ret < 0)
 +    {
 +      matcher->groupIndeces[gi] = o;
 +      --matcher->groups[gi];
 +    }
 +    return ret;
 +  }
 +  else if (!b || matcher->groups[gi] >= max)
 +  {
 +    return next->match(str, matcher, curInd);
 +  }
 +  else
 +  {
 +    switch (type)
 +    {
 +    case 0: return matchGreedy(str, matcher, curInd);
 +    case 1: return matchLazy(str, matcher, curInd);
 +    case 2: return matchPossessive(str, matcher, curInd);
 +    }
 +  }
 +  return -1;
 +}
 +int NFAGroupLoopUNode::matchGreedy(const bkstring & str, WCMatcher * matcher, const int curInd) const
 +{
 +  int o = matcher->groupIndeces[gi];            // save our info for backtracking
 +  matcher->groupIndeces[gi] = curInd;           // move along
 +  ++matcher->groups[gi];
 +  int ret = inner->match(str, matcher, curInd); // match internally
 +  if (ret < 0)
 +  {                                             // if we failed, then restore info and match next
 +    --matcher->groups[gi];
 +    matcher->groupIndeces[gi] = o;
 +    ret = next->match(str, matcher, curInd);
 +  }
 +  return ret;
 +}
 +int NFAGroupLoopUNode::matchLazy(const bkstring & str, WCMatcher * matcher, const int curInd) const
 +{
 +  int ret = next->match(str, matcher, curInd);  // be lazy, just go on
 +  if (ret < 0)
 +  {
 +    int o = matcher->groupIndeces[gi];          // save info for backtracking
 +    matcher->groupIndeces[gi] = curInd;         // advance our position
 +    ++matcher->groups[gi];
 +    ret = inner->match(str, matcher, curInd);   // match our internal stuff
 +    if (ret < 0)                                // if we failed, then restore the info
 +    {
 +      --matcher->groups[gi];
 +      matcher->groupIndeces[gi] = o;
 +    }
 +  }
 +  return ret;
 +}
 +int NFAGroupLoopUNode::matchPossessive(const bkstring & str, WCMatcher * matcher, const int curInd) const
 +{
 +  int o = matcher->groupIndeces[gi];            // save info for backtracking
 +  matcher->groupPos[gi] = matcher->groups[gi];  // set a flag stating we have matcher at least this much
 +  matcher->groupIndeces[gi] = curInd;           // move along
 +  ++matcher->groups[gi];
 +  int ret = inner->match(str, matcher, curInd); // try and match again
 +  if (ret < 0)
 +  {                                             // if we fail, back off, but to an extent
 +    --matcher->groups[gi];
 +    matcher->groupIndeces[gi] = o;
 +    if (matcher->groups[gi] == matcher->groupPos[gi]) ret = next->match(str, matcher, curInd);
 +  }
 +  return ret;
 +}
 +
 +#ifdef _WIN32
 +  #pragma warning(pop)
 +#endif
 diff --git a/plugins/SmileyAdd/src/regexp/WCPattern.h b/plugins/SmileyAdd/src/regexp/WCPattern.h new file mode 100644 index 0000000000..3d52a7fd2e --- /dev/null +++ b/plugins/SmileyAdd/src/regexp/WCPattern.h @@ -0,0 +1,1663 @@ +#ifndef __WCPATTERN_H__
 +#define __WCPATTERN_H__
 +
 +#ifdef _WIN32
 +  #pragma warning(disable:4786)
 +#endif
 +
 +#include "bkstring.h"
 +
 +#include <vector>
 +#include <map>
 +
 +class WCMatcher;
 +class NFAUNode;
 +class NFAQuantifierUNode;
 +
 +/**
 +  This pattern class is very similar in functionality to Java's
 +  java.util.regex.WCPattern class. The pattern class represents an immutable
 +  regular expression object. Instead of having a single object contain both the
 +  regular expression object and the matching object, instead the two objects are
 +  split apart. The {@link WCMatcher WCMatcher} class represents the maching
 +  object.
 +
 +  The WCPattern class works primarily off of "compiled" patterns. A typical
 +  instantiation of a regular expression looks like:
 +
 +  <pre>
 +  WCPattern * p = WCPattern::compile(L"a*b");
 +  WCMatcher * m = p->createWCMatcher(L"aaaaaab");
 +  if (m->matches()) ...
 +  </pre>
 +
 +  However, if you do not need to use a pattern more than once, it is often times
 +  okay to use the WCPattern's static methods insteads. An example looks like this:
 +
 +  <pre>
 +  if (WCPattern::matches(L"a*b", L"aaaab")) { ... }
 +  </pre>
 +
 +  This class does not currently support unicode. The unicode update for this
 +  class is coming soon.
 +
 +  This class is partially immutable. It is completely safe to call createWCMatcher
 +  concurrently in different threads, but the other functions (e.g. split) should
 +  not be called concurrently on the same <code>WCPattern</code>.
 +
 +  <table border="0" cellpadding="1" cellspacing="0">
 +    <tr align="left" bgcolor="#CCCCFF">
 +      <td>
 +        <b>Construct</b>
 +      </td>
 +      <td>
 +        <b>Matches</b>
 +      </th>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +       
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +        <b>Characters</b>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x</i></code>
 +      </td>
 +      <td>
 +        The character <code><i>x</i></code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\\</code>
 +      </td>
 +      <td>
 +        The character <code>\</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\0<i>nn</i></code>
 +      </td>
 +      <td>
 +        The character with octal ASCII value <code><i>nn</i></code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\0<i>nnn</i></code>
 +      </td>
 +      <td>
 +        The character with octal ASCII value <code><i>nnn</i></code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\x<i>hh</i></code>
 +      </td>
 +      <td>
 +        The character with hexadecimal ASCII value <code><i>hh</i></code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\t</code>
 +      </td>
 +      <td>
 +        A tab character
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\r</code>
 +      </td>
 +      <td>
 +        A carriage return character
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\n</code>
 +      </td>
 +      <td>
 +        A new-line character
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +         
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <b>Character Classes</b>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>[abc]</code>
 +      </td>
 +      <td>
 +        Either <code>a</code>, <code>b</code>, or <code>c</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>[^abc]</code>
 +      </td>
 +      <td>
 +        Any character but <code>a</code>, <code>b</code>, or <code>c</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>[a-zA-Z]</code>
 +      </td>
 +      <td>
 +        Any character ranging from <code>a</code> thru <code>z</code>, or
 +        <code>A</code> thru <code>Z</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>[^a-zA-Z]</code>
 +      </td>
 +      <td>
 +        Any character except those ranging from <code>a</code> thru
 +        <code>z</code>, or <code>A</code> thru <code>Z</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>[a\-z]</code>
 +      </td>
 +      <td>
 +        Either <code>a</code>, <code>-</code>, or <code>z</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>[a-z[A-Z]]</code>
 +      </td>
 +      <td>
 +        Same as <code>[a-zA-Z]</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>[a-z&&[g-i]]</code>
 +      </td>
 +      <td>
 +        Any character in the intersection of <code>a-z</code> and
 +        <code>g-i</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>[a-z&&[^g-i]]</code>
 +      </td>
 +      <td>
 +        Any character in <code>a-z</code> and not in <code>g-i</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +         
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +        <b>Prefefined character classes</b>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><b>.</b></code>
 +      </td>
 +      <td>
 +        Any character. Multiline matching must be compiled into the pattern for
 +        <code><b>.</b></code> to match a <code>\r</code> or a <code>\n</code>.
 +        Even if multiline matching is enabled, <code><b>.</b></code> will not
 +        match a <code>\r\n</code>, only a <code>\r</code> or a <code>\n</code>.
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\d</code>
 +      </td>
 +      <td>
 +        <code>[0-9]</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\D</code>
 +      </td>
 +      <td>
 +        <code>[^\d]</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\s</code>
 +      </td>
 +      <td>
 +        <code>[ \t\r\n\x0B]</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\S</code>
 +      </td>
 +      <td>
 +        <code>[^\s]</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\w</code>
 +      </td>
 +      <td>
 +        <code>[a-zA-Z0-9_]</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\W</code>
 +      </td>
 +      <td>
 +        <code>[^\w]</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +         
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +        <b>POSIX character classes
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\p{Lower}</code>
 +      </td>
 +      <td>
 +        <code>[a-z]</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\p{Upper}</code>
 +      </td>
 +      <td>
 +        <code>[A-Z]</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\p{ASCII}</code>
 +      </td>
 +      <td>
 +        <code>[\x00-\x7F]</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\p{Alpha}</code>
 +      </td>
 +      <td>
 +        <code>[a-zA-Z]</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\p{Digit}</code>
 +      </td>
 +      <td>
 +        <code>[0-9]</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\p{Alnum}</code>
 +      </td>
 +      <td>
 +        <code>[\w&&[^_]]</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\p{Punct}</code>
 +      </td>
 +      <td>
 +        <code>[!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~]</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\p{XDigit}</code>
 +      </td>
 +      <td>
 +        <code>[a-fA-F0-9]</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +         
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +        <b>Boundary Matches</b>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>^</code>
 +      </td>
 +      <td>
 +        The beginning of a line. Also matches the beginning of input.
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>$</code>
 +      </td>
 +      <td>
 +        The end of a line. Also matches the end of input.
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\b</code>
 +      </td>
 +      <td>
 +        A word boundary
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\B</code>
 +      </td>
 +      <td>
 +        A non word boundary
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\A</code>
 +      </td>
 +      <td>
 +        The beginning of input
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\G</code>
 +      </td>
 +      <td>
 +        The end of the previous match. Ensures that a "next" match will only
 +        happen if it begins with the character immediately following the end of
 +        the "current" match.
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\Z</code>
 +      </td>
 +      <td>
 +        The end of input. Will also match if there is a single trailing
 +        <code>\r\n</code>, a single trailing <code>\r</code>, or a single
 +        trailing <code>\n</code>.
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\z</code>
 +      </td>
 +      <td>
 +        The end of input
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +         
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +        <b>Greedy Quantifiers</b>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x?</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, either zero times or one time
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x*</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, zero or more times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x+</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, one or more times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x{n}</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, exactly n times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x{n,}</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, at least <code><i>n</i></code> times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x{,m}</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, at most <code><i>m</i></code> times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x{n,m}</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, at least <code><i>n</i></code> times and at most
 +        <code><i>m</i></code> times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +         
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +        <b>Possessive Quantifiers</b>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x?+</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, either zero times or one time
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x*+</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, zero or more times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x++</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, one or more times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x{n}+</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, exactly n times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x{n,}+</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, at least <code><i>n</i></code> times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x{,m}+</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, at most <code><i>m</i></code> times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x{n,m}+</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, at least <code><i>n</i></code> times and at most
 +        <code><i>m</i></code> times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +         
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +        <b>Reluctant Quantifiers</b>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x??</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, either zero times or one time
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x*?</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, zero or more times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x+?</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, one or more times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x{n}?</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, exactly n times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x{n,}?</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, at least <code><i>n</i></code> times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x{,m}?</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, at most <code><i>m</i></code> times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x{n,m}?</i></code>
 +      </td>
 +      <td>
 +        <i>x</i>, at least <code><i>n</i></code> times and at most
 +        <code><i>m</i></code> times
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +         
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +        <b>Operators</b>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>xy</i></code>
 +      </td>
 +      <td>
 +        <code><i>x</i></code> then <code><i>y</i></code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code><i>x</i></code>|<code><i>y</i></code>
 +      </td>
 +      <td>
 +        <code><i>x</i></code> or <code><i>y</i></code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>(<i>x</i>)</code>
 +      </td>
 +      <td>
 +        <code><i>x</i></code> as a capturing group
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +         
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +        <b>Quoting</b>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\Q</code>
 +      </td>
 +      <td>
 +        Nothing, but treat every character (including \s) literally until a
 +        matching <code>\E</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>\E</code>
 +      </td>
 +      <td>
 +        Nothing, but ends its matching <code>\Q</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +         
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +        <b>Special Constructs</b>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>(?:<i>x</i>)</code>
 +      </td>
 +      <td>
 +        <code><i>x</i></code>, but not as a capturing group
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>(?=<i>x</i>)</code>
 +      </td>
 +      <td>
 +        <code><i>x</i></code>, via positive lookahead. This means that the
 +        expression will match only if it is trailed by <code><i>x</i></code>.
 +        It will not "eat" any of the characters matched by
 +        <code><i>x</i></code>.
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>(?!<i>x</i>)</code>
 +      </td>
 +      <td>
 +        <code><i>x</i></code>, via negative lookahead. This means that the
 +        expression will match only if it is not trailed by
 +        <code><i>x</i></code>. It will not "eat" any of the characters
 +        matched by <code><i>x</i></code>.
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>(?<=<i>x</i>)</code>
 +      </td>
 +      <td>
 +        <code><i>x</i></code>, via positive lookbehind. <code><i>x</i></code>
 +        cannot contain any quantifiers.
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>(?<!<i>x</i>)</code>
 +      </td>
 +      <td>
 +        <code><i>x</i></code>, via negative lookbehind. <code><i>x</i></code>
 +        cannot contain any quantifiers.
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>(?><i>x</i>)</code>
 +      </td>
 +      <td>
 +        <code><i>x</i>{1}+</code>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +         
 +      </td>
 +    </tr>
 +    <tr>
 +      <td colspan="2">
 +        <b>Registered Expression Matching</b>
 +      </td>
 +    </tr>
 +    <tr>
 +      <td>
 +        <code>{<i>x</i>}</code>
 +      </td>
 +      <td>
 +        The registered pattern <code><i>x</i></code>
 +      </td>
 +    </tr>
 +  </table>
 +
 +  <hr>
 +
 +   <i>Begin Text Extracted And Modified From java.util.regex.WCPattern documentation</i>
 +
 +   <h4> Backslashes, escapes, and quoting </h4>
 +
 +   <p> The backslash character (<tt>(wchar_t)'\'</tt>) serves to introduce escaped
 +   constructs, as defined in the table above, as well as to quote characters
 +   that otherwise would be interpreted as unescaped constructs.  Thus the
 +   expression <tt>\\</tt> matches a single backslash and <tt>\{</tt> matches a
 +   left brace.
 +
 +   <p> It is an error to use a backslash prior to any alphabetic character that
 +   does not denote an escaped construct; these are reserved for future
 +   extensions to the regular-expression language.  A backslash may be used
 +   prior to a non-alphabetic character regardless of whether that character is
 +   part of an unescaped construct.
 +
 +   <p>It is necessary to double backslashes in string literals that represent
 +   regular expressions to protect them from interpretation by a compiler.  The
 +   string literal <tt>"\b"</tt>, for example, matches a single backspace
 +   character when interpreted as a regular expression, while
 +   <tt>"\\b"</tt> matches a word boundary.  The string litera
 +    <tt>"\(hello\)"</tt> is illegal and leads to a compile-time error;
 +    in order to match the string <tt>(hello)</tt> the string literal
 +    <tt>"\\(hello\\)"</tt> must be used.
 +
 +   <h4> Character Classes </h4>
 +
 +      <p> Character classes may appear within other character classes, and
 +      may be composed by the union operator (implicit) and the intersection
 +      operator (<tt>&&</tt>).
 +      The union operator denotes a class that contains every character that is
 +      in at least one of its operand classes.  The intersection operator
 +      denotes a class that contains every character that is in both of its
 +      operand classes.
 +
 +      <p> The precedence of character-class operators is as follows, from
 +      highest to lowest:
 +
 +      <blockquote><table border="0" cellpadding="1" cellspacing="0"
 +                   summary="Precedence of character class operators.">
 +
 +        <tr><th>1    </th>
 +            <td>Literal escape    </td>
 +            <td><tt>\x</tt></td></tr>
 +        <tr><th>2    </th>
 +            <td>Range</td>
 +            <td><tt>a-z</tt></td></tr>
 +        <tr><th>3    </th>
 +            <td>Grouping</td>
 +            <td><tt>[...]</tt></td></tr>
 +        <tr><th>4    </th>
 +            <td>Intersection</td>
 +            <td><tt>[a-z&&[aeiou]]</tt></td></tr>
 +        <tr><th>5    </th>
 +            <td>Union</td>
 +            <td><tt>[a-e][i-u]<tt></td></tr>
 +      </table></blockquote>
 +
 +      <p> Note that a different set of metacharacters are in effect inside
 +      a character class than outside a character class. For instance, the
 +      regular expression <tt>.</tt> loses its special meaning inside a
 +      character class, while the expression <tt>-</tt> becomes a range
 +      forming metacharacter.
 +
 +   <a name="lt">
 +
 +   <a name="cg">
 +   <h4> Groups and capturing </h4>
 +
 +   <p> Capturing groups are numbered by counting their opening parentheses from
 +   left to right.  In the expression <tt>((A)(B(C)))</tt>, for example, there
 +   are four such groups: </p>
 +
 +   <blockquote><table cellpadding=1 cellspacing=0 summary="Capturing group numberings">
 +
 +   <tr><th>1    </th>
 +       <td><tt>((A)(B(C)))</tt></td></tr>
 +   <tr><th>2    </th>
 +       <td><tt>(A)</tt></td></tr>
 +   <tr><th>3    </th>
 +       <td><tt>(B(C))</tt></td></tr>
 +
 +   <tr><th>4    </th>
 +       <td><tt>(C)</tt></td></tr>
 +   </table></blockquote>
 +
 +   <p> Group zero always stands for the entire expression.
 +
 +   <p> Capturing groups are so named because, during a match, each subsequence
 +   of the input sequence that matches such a group is saved.  The captured
 +   subsequence may be used later in the expression, via a back reference, and
 +   may also be retrieved from the matcher once the match operation is complete.
 +
 +   <p> The captured input associated with a group is always the subsequence
 +   that the group most recently matched.  If a group is evaluated a second time
 +   because of quantification then its previously-captured value, if any, will
 +   be retained if the second evaluation fails.  Matching the string
 +   <tt>L"aba"</tt> against the expression <tt>(a(b)?)+</tt>, for example, leaves
 +   group two set to <tt>L"b"</tt>.  All captured input is discarded at the
 +   beginning of each match.
 +
 +   <p> Groups beginning with <tt>(?</tt> are pure, <i>non-capturing</i> groups
 +   that do not capture text and do not count towards the group total.
 +
 +
 +   <h4> WC support </h4>
 +
 +   <p> Coming Soon.
 +
 +   <h4> Comparison to Perl 5 </h4>
 +
 +   <p>The <code>WCPattern</code> engine performs traditional NFA-based matching
 +   with ordered alternation as occurs in Perl 5.
 +
 +   <p> Perl constructs not supported by this class: </p>
 +
 +   <ul>
 +
 +      <li><p> The conditional constructs <tt>(?{</tt><i>X</i><tt>})</tt> and
 +      <tt>(?(</tt><i>condition</i><tt>)</tt><i>X</i><tt>|</tt><i>Y</i><tt>)</tt>,
 +      </p></li>
 +
 +      <li><p> The embedded code constructs <tt>(?{</tt><i>code</i><tt>})</tt>
 +      and <tt>(??{</tt><i>code</i><tt>})</tt>,</p></li>
 +
 +      <li><p> The embedded comment syntax <tt>(?#comment)</tt>, and </p></li>
 +
 +      <li><p> The preprocessing operations <tt>\l</tt> <tt>\u</tt>,
 +      <tt>\L</tt>, and <tt>\U</tt>.  </p></li>
 +
 +      <li><p> Embedded flags</p></li>
 +
 +   </ul>
 +
 +   <p> Constructs supported by this class but not by Perl: </p>
 +
 +   <ul>
 +
 +      <li><p> Possessive quantifiers, which greedily match as much as they can
 +      and do not back off, even when doing so would allow the overall match to
 +      succeed.  </p></li>
 +
 +      <li><p> Character-class union and intersection as described
 +      above.</p></li>
 +
 +   </ul>
 +
 +   <p> Notable differences from Perl: </p>
 +
 +   <ul>
 +
 +      <li><p> In Perl, <tt>\1</tt> through <tt>\9</tt> are always interpreted
 +      as back references; a backslash-escaped number greater than <tt>9</tt> is
 +      treated as a back reference if at least that many subexpressions exist,
 +      otherwise it is interpreted, if possible, as an octal escape.  In this
 +      class octal escapes must always begin with a zero. In this class,
 +      <tt>\1</tt> through <tt>\9</tt> are always interpreted as back
 +      references, and a larger number is accepted as a back reference if at
 +      least that many subexpressions exist at that point in the regular
 +      expression, otherwise the parser will drop digits until the number is
 +      smaller or equal to the existing number of groups or it is one digit.
 +      </p></li>
 +
 +      <li><p> Perl uses the <tt>g</tt> flag to request a match that resumes
 +      where the last match left off.  This functionality is provided implicitly
 +      by the <CODE>WCMatcher</CODE> class: Repeated invocations of the
 +      <code>find</code> method will resume where the last match left off,
 +      unless the matcher is reset.  </p></li>
 +
 +      <li><p> Perl is forgiving about malformed matching constructs, as in the
 +      expression <tt>*a</tt>, as well as dangling brackets, as in the
 +      expression <tt>abc]</tt>, and treats them as literals.  This
 +      class also strict and will not compile a pattern when dangling characters
 +      are encountered.</p></li>
 +
 +   </ul>
 +
 +
 +   <p> For a more precise description of the behavior of regular expression
 +   constructs, please see <a href="http://www.oreilly.com/catalog/regex2/">
 +   <i>Mastering Regular Expressions, 2nd Edition</i>, Jeffrey E. F. Friedl,
 +   O'Reilly and Associates, 2002.</a>
 +   </p>
 +  <P>
 +
 +  <i>End Text Extracted And Modified From java.util.regex.WCPattern documentation</i>
 +
 +  <hr>
 +
 +  @author    Jeffery Stuart
 +  @since     March 2003, Stable Since November 2004
 +  @version   1.07.00
 +  @memo      A class used to represent "PERL 5"-ish regular expressions
 + */
 +class WCPattern
 +{
 +  friend class WCMatcher;
 +  friend class NFAUNode;
 +  friend class NFAQuantifierUNode;
 +  private:
 +    /**
 +      This constructor should not be called directly. Those wishing to use the
 +      WCPattern class should instead use the {@link compile compile} method.
 +
 +      @param rhs The pattern to compile
 +      @memo Creates a new pattern from the regular expression in <code>rhs</code>.
 +     */
 +    WCPattern(const bkstring & rhs);
 +  protected:
 +    /**
 +      This currently is not used, so don't try to do anything with it.
 +      @memo Holds all the compiled patterns for quick access.
 +     */
 +    static std::map<bkstring, WCPattern *> compiledWCPatterns;
 +    /**
 +      Holds all of the registered patterns as strings. Due to certain problems
 +      with compilation of patterns, especially with capturing groups, this seemed
 +      to be the best way to do it.
 +     */
 +    static std::map<bkstring, std::pair<bkstring, unsigned long> > registeredWCPatterns;
 +  protected:
 +    /**
 +      Holds all the NFA nodes used. This makes deletion of a pattern, as well as
 +      clean-up from an unsuccessful compile much easier and faster.
 +     */
 +    std::map<NFAUNode*, bool> nodes;
 +    /**
 +      Used when methods like split are called. The matcher class uses a lot of
 +      dynamic memeory, so having an instance increases speedup of certain
 +      operations.
 +     */
 +    WCMatcher * matcher;
 +    /**
 +      The front node of the NFA.
 +     */
 +    NFAUNode * head;
 +    /**
 +      The actual regular expression we rerpesent
 +      */
 +    bkstring pattern;
 +    /**
 +      Flag used during compilation. Once the pattern is successfully compiled,
 +      <code>error</code> is no longer used.
 +     */
 +    bool error;
 +    /**
 +      Used during compilation to keep track of the current index into
 +      <code>{@link pattern pattern}<code>.  Once the pattern is successfully
 +      compiled, <code>error</code> is no longer used.
 +     */
 +    int curInd;
 +    /**
 +      The number of capture groups this contains.
 +     */
 +    int groupCount;
 +    /**
 +      The number of non-capture groups this contains.
 +     */
 +    int nonCapGroupCount;
 +    /**
 +      The flags specified when this was compiled.
 +     */
 +    unsigned long flags;
 +  protected:
 +    /**
 +      Raises an error during compilation. Compilation will cease at that point
 +      and compile will return <code>NULL</code>.
 +     */
 +    void raiseError();
 +    /**
 +      Convenience function for registering a node in <code>nodes</code>.
 +      @param node The node to register
 +      @return The registered node
 +     */
 +    NFAUNode * registerNode(NFAUNode * node);
 +
 +    /**
 +      Calculates the union of two strings. This function will first sort the
 +      strings and then use a simple selection algorithm to find the union.
 +      @param s1 The first "class" to union
 +      @param s2 The second "class" to union
 +      @return A new string containing all unique characters. Each character
 +              must have appeared in one or both of <code>s1</code> and
 +              <code>s2</code>.
 +     */
 +    bkstring classUnion      (bkstring s1, bkstring s2)  const;
 +    /**
 +      Calculates the intersection of two strings. This function will first sort
 +      the strings and then use a simple selection algorithm to find the
 +      intersection.
 +      @param s1 The first "class" to intersect
 +      @param s2 The second "class" to intersect
 +      @return A new string containing all unique characters. Each character
 +              must have appeared both <code>s1</code> and <code>s2</code>.
 +     */
 +    bkstring classIntersect  (bkstring s1, bkstring s2)  const;
 +    /**
 +      Calculates the negation of a string. The negation is the set of all
 +      characters between <code>\x00</code> and <code>\xFF</code> not
 +      contained in <code>s1</code>.
 +      @param s1 The "class" to be negated.
 +      @param s2 The second "class" to intersect
 +      @return A new string containing all unique characters. Each character
 +              must have appeared both <code>s1</code> and <code>s2</code>.
 +     */
 +    bkstring classNegate     (bkstring s1)                  const;
 +    /**
 +      Creates a new "class" representing the range from <code>low</code> thru
 +      <code>hi</code>. This function will wrap if <code>low</code> >
 +      <code>hi</code>. This is a feature, not a buf. Sometimes it is useful
 +      to be able to say [\x70-\x10] instead of [\x70-\x7F\x00-\x10].
 +      @param low The beginning character
 +      @param hi  The ending character
 +      @return A new string containing all the characters from low thru hi.
 +     */
 +    bkstring classCreateRange(wchar_t low,       wchar_t hi)         const;
 +
 +    /**
 +      Extracts a decimal number from the substring of member-variable
 +      <code>{@link pattern pattern}<code> starting at <code>start</code> and
 +      ending at <code>end</code>.
 +      @param start The starting index in <code>{@link pattern pattern}<code>
 +      @param end The last index in <code>{@link pattern pattern}<code>
 +      @return The decimal number in <code>{@link pattern pattern}<code>
 +     */
 +    int getInt(int start, int end);
 +    /**
 +      Parses a <code>{n,m}</code> string out of the member-variable
 +      <code>{@link pattern pattern}<code> stores the result in <code>sNum</code>
 +      and <code>eNum</code>.
 +      @param sNum Output parameter. The minimum number of matches required
 +                  by the curly quantifier are stored here.
 +      @param eNum Output parameter. The maximum number of matches allowed
 +                  by the curly quantifier are stored here.
 +      @return Success/Failure. Fails when the curly does not have the proper
 +              syntax
 +     */
 +    bool quantifyCurly(int & sNum, int & eNum);
 +    /**
 +      Tries to quantify the currently parsed group. If the group being parsed
 +      is indeed quantified in the member-variable
 +      <code>{@link pattern pattern}<code>, then the NFA is modified accordingly.
 +      @param start  The starting node of the current group being parsed
 +      @param stop   The ending node of the current group being parsed
 +      @param gn     The group number of the current group being parsed
 +      @return       The node representing the starting node of the group. If the
 +                    group becomes quantified, then this node is not necessarily
 +                    a GroupHead node.
 +     */
 +    NFAUNode * quantifyGroup(NFAUNode * start, NFAUNode * stop, const int gn);
 +
 +    /**
 +      Tries to quantify the last parsed expression. If the character was indeed
 +      quantified, then the NFA is modified accordingly.
 +      @param newNode The recently created expression node
 +      @return The node representing the last parsed expression. If the
 +              expression was quantified, <code>return value != newNode</code>
 +     */
 +    NFAUNode * quantify(NFAUNode * newNode);
 +    /**
 +      Parses the current class being examined in
 +      <code>{@link pattern pattern}</code>.
 +      @return A string of unique characters contained in the current class being
 +              parsed
 +     */
 +    bkstring parseClass();
 +    /**
 +      Parses the current POSIX class being examined in
 +      <code>{@link pattern pattern}</code>.
 +      @return A string of unique characters representing the POSIX class being
 +              parsed
 +     */
 +    bkstring parsePosix();
 +    /**
 +      Returns a string containing the octal character being parsed
 +      @return The string contained the octal value being parsed
 +     */
 +    bkstring parseOctal();
 +    /**
 +      Returns a string containing the hex character being parsed
 +      @return The string contained the hex value being parsed
 +     */
 +    bkstring parseHex();
 +    /**
 +      Returns a new node representing the back reference being parsed
 +      @return The new node representing the back reference being parsed
 +     */
 +    NFAUNode *   parseBackref();
 +    /**
 +      Parses the escape sequence currently being examined. Determines if the
 +      escape sequence is a class, a single character, or the beginning of a
 +      quotation sequence.
 +      @param inv Output parameter. Whether or not to invert the returned class
 +      @param quo Output parameter. Whether or not this sequence starts a
 +                 quotation.
 +      @return The characters represented by the class
 +     */
 +    bkstring parseEscape(bool & inv, bool & quo);
 +    /**
 +      Parses a supposed registered pattern currently under compilation. If the
 +      sequence of characters does point to a registered pattern, then the
 +      registered pattern is appended to <code>*end<code>. The registered pattern
 +      is parsed with the current compilation flags.
 +      @param end The ending node of the thus-far compiled pattern
 +      @return The new end node of the current pattern
 +     */
 +    NFAUNode * parseRegisteredWCPattern(NFAUNode ** end);
 +    /**
 +      Parses a lookbehind expression. Appends the necessary nodes
 +      <code>*end</code>.
 +      @param pos Positive or negative look behind
 +      @param end The ending node of the current pattern
 +      @return The new end node of the current pattern
 +     */
 +    NFAUNode * parseBehind(const bool pos, NFAUNode ** end);
 +    /**
 +      Parses the current expression and tacks on nodes until a \E is found.
 +      @return The end of the current pattern
 +     */
 +    NFAUNode * parseQuote();
 +    /**
 +      Parses <code>{@link pattern pattern}</code>. This function is called
 +      recursively when an or (<code>|</code>) or a group is encountered.
 +      @param inParen Are we currently parsing inside a group
 +      @param inOr Are we currently parsing one side of an or (<code>|</code>)
 +      @param end The end of the current expression
 +      @return The starting node of the NFA constructed from this parse
 +     */
 +    NFAUNode * parse(const bool inParen = 0, const bool inOr = 0, NFAUNode ** end = NULL);
 +  public:
 +    /// We should match regardless of case
 +    const static unsigned long CASE_INSENSITIVE;
 +    /// We are implicitly quoted
 +    const static unsigned long LITERAL;
 +    /// @memo We should treat a <code><b>.</b></code> as [\x00-\x7F]
 +    const static unsigned long DOT_MATCHES_ALL;
 +    /** <code>^</code> and <code>$</code> should anchor to the beginning and
 +        ending of lines, not all input
 +     */
 +    const static unsigned long MULTILINE_MATCHING;
 +    /** When enabled, only instances of <code>\n</codes> are recognized as
 +        line terminators
 +     */
 +    const static unsigned long UNIX_LINE_MODE;
 +    /// The absolute minimum number of matches a quantifier can match (0)
 +    const static int MIN_QMATCH;
 +    /// The absolute maximum number of matches a quantifier can match (0x7FFFFFFF)
 +    const static int MAX_QMATCH;
 +  public:
 +    /**
 +      Call this function to compile a regular expression into a
 +      <code>WCPattern</code> object. Special values can be assigned to
 +      <code>mode</code> when certain non-standard behaviors are expected from
 +      the <code>WCPattern</code> object.
 +      @param pattern The regular expression to compile
 +      @param mode    A bitwise or of flags signalling what special behaviors are
 +                     wanted from this <code>WCPattern</code> object
 +      @return If successful, <code>compile</code> returns a <code>WCPattern</code>
 +              pointer. Upon failure, <code>compile</code> returns
 +              <code>NULL</code>
 +     */
 +    static WCPattern                    * compile        (const bkstring & pattern,
 +                                                        const unsigned long mode = 0);
 +    /**
 +      Dont use this function. This function will compile a pattern, and cache
 +      the result. This will eventually be used as an optimization when people
 +      just want to call static methods using the same pattern over and over
 +      instead of first compiling the pattern and then using the compiled
 +      instance for matching.
 +      @param pattern The regular expression to compile
 +      @param mode    A bitwise or of flags signalling what special behaviors are
 +                     wanted from this <code>WCPattern</code> object
 +      @return If successful, <code>compileAndKeep</code> returns a
 +              <code>WCPattern</code> pointer. Upon failure, <code>compile</code>
 +              returns <code>NULL</code>.
 +     */
 +    static WCPattern                    * compileAndKeep (const bkstring & pattern,
 +                                                        const unsigned long mode = 0);
 +
 +    /**
 +      Searches through <code>replace</code> and replaces all substrings matched
 +      by <code>pattern</code> with <code>str</code>. <code>str</code> may
 +      contain backreferences (e.g. <code>\1</code>) to capture groups. A typical
 +      invocation looks like:
 +      <p>
 +      <code>
 +      WCPattern::replace(L"(a+)b(c+)", L"abcccbbabcbabc", L"\\2b\\1");
 +      </code>
 +      <p>
 +      which would replace <code>abcccbbabcbabc</code> with
 +      <code>cccbabbcbabcba</code>.
 +      @param pattern          The regular expression
 +      @param str              The replacement text
 +      @param replacementText  The string in which to perform replacements
 +      @param mode             The special mode requested of the <code>WCPattern</code>
 +                              during the replacement process
 +      @return The text with the replacement string substituted where necessary
 +     */
 +    static bkstring                  replace       (const bkstring & pattern,
 +                                                        const bkstring & str,
 +                                                        const bkstring & replacementText,
 +                                                        const unsigned long mode = 0);
 +
 +    /**
 +      Splits the specified string over occurrences of the specified pattern.
 +      Empty strings can be optionally ignored. The number of strings returned is
 +      configurable. A typical invocation looks like:
 +      <p>
 +      <code>
 +      bkstring str(strSize, 0);<br>
 +      FILE * fp = fopen(fileName, "r");<br>
 +      fread((char*)str.data(), strSize * 2, 1, fp);<br>
 +      fclose(fp);<br>
 +      <br>
 +      std::vector<bkstring> lines = WCPattern::split(L"[\r\n]+", str, true);<br>
 +      <br>
 +      </code>
 +
 +      @param pattern    The regular expression
 +      @param replace    The string to split
 +      @param keepEmptys Whether or not to keep empty strings
 +      @param limit      The maximum number of splits to make
 +      @param mode       The special mode requested of the <code>WCPattern</code>
 +                        during the split process
 +      @return All substrings of <code>str</code> split across <code>pattern</code>.
 +     */
 +    static std::vector<bkstring>     split         (const bkstring & pattern,
 +                                                        const bkstring & str,
 +                                                        const bool keepEmptys = 0,
 +                                                        const unsigned long limit = 0,
 +                                                        const unsigned long mode = 0);
 +
 +    /**
 +      Finds all the instances of the specified pattern within the string. You
 +      should be careful to only pass patterns with a minimum length of one. For
 +      example, the pattern <code>a*</code> can be matched by an empty string, so
 +      instead you should pass <code>a+</code> since at least one character must
 +      be matched. A typical invocation of <code>findAll</code> looks like:
 +      <p>
 +      <code>
 +      std::vector<td::string> numbers = WCPattern::findAll(L"\\d+", string);
 +      </code>
 +      <p>
 +
 +      @param pattern  The pattern for which to search
 +      @param str      The string to search
 +      @param mode     The special mode requested of the <code>WCPattern</code>
 +                      during the find process
 +      @return All instances of <code>pattern</code> in <code>str</code>
 +     */
 +    static std::vector<bkstring>     findAll       (const bkstring & pattern,
 +                                                        const bkstring & str,
 +                                                        const unsigned long mode = 0);
 +
 +    /**
 +      Determines if an entire string matches the specified pattern
 +
 +      @param pattern  The pattern for to match
 +      @param str      The string to match
 +      @param mode     The special mode requested of the <code>WCPattern</code>
 +                      during the replacement process
 +      @return True if <code>str</code> is recognized by <code>pattern</code>
 +     */
 +    static bool                         matches        (const bkstring & pattern,
 +                                                        const bkstring & str,
 +                                                        const unsigned long mode = 0);
 +
 +    /**
 +      Registers a pattern under a specific name for use in later compilations.
 +      A typical invocation and later use looks like:
 +      <p>
 +      <code>
 +      WCPattern::registerWCPattern(L"ip", L"(?:\\d{1,3}\\.){3}\\d{1,3}");<br>
 +      WCPattern * p1 = WCPattern::compile(L"{ip}:\\d+");<br>
 +      WCPattern * p2 = WCPattern::compile(L"Connection from ({ip}) on port \\d+");<br>
 +      </code>
 +      <p>
 +      Multiple calls to <code>registerWCPattern</code> with the same
 +      <code>name</code> will result in the pattern getting overwritten.
 +
 +      @param name     The name to give to the pattern
 +      @param pattern  The pattern to register
 +      @param mode     Any special flags to use when compiling pattern
 +      @return Success/Failure. Fails only if <code>pattern</code> has invalid
 +              syntax
 +     */
 +    static bool                         registerWCPattern(const bkstring & name,
 +                                                        const bkstring & pattern,
 +                                                        const unsigned long mode = 0);
 +
 +    /**
 +      Clears the pattern registry
 +      */
 +    static void                         unregisterWCPatterns();
 +    /**
 +      Don't use
 +     */
 +    static void                         clearWCPatternCache();
 +
 +    /**
 +      Searches through a string for the <code>n<sup>th</sup></code> match of the
 +      given pattern in the string. Match indeces start at zero, not one.
 +      A typical invocation looks like this:
 +      <p>
 +      <code>
 +      std::pair<bkstring, int> match = WCPattern::findNthMatch(L"\\d{1,3}", L"192.168.1.101:22", 1);<br>
 +      wprintf(L"%s %i\n", match.first.c_str(), match.second);<br>
 +      <br>
 +      Output: 168 4<br>
 +      <br>
 +
 +      @param pattern  The pattern for which to search
 +      @param str      The string to search
 +      @param matchNum Which match to find
 +      @param mode     Any special flags to use during the matching process
 +      @return A string and an integer. The string is the string matched. The
 +              integer is the starting location of the matched string in
 +              <code>str</code>. You can check for success/failure by making sure
 +              that the integer returned is greater than or equal to zero.
 +     */
 +    static std::pair<bkstring, int>  findNthMatch   (const bkstring & pattern,
 +                                                        const bkstring & str,
 +                                                        const int matchNum,
 +                                                        const unsigned long mode = 0);
 +  public:
 +    /**
 +      Deletes all NFA nodes allocated during compilation
 +     */
 +    ~WCPattern();
 +
 +    bkstring               replace       (const bkstring & str,
 +                                              const bkstring & replacementText);
 +    std::vector<bkstring>  split         (const bkstring & str, const bool keepEmptys = 0,
 +                                              const unsigned long limit = 0);
 +    std::vector<bkstring>  findAll       (const bkstring & str);
 +    bool                       matches       (const bkstring & str);
 +    /**
 +      Returns the flags used during compilation of this pattern
 +      @return The flags used during compilation of this pattern
 +     */
 +    unsigned long             getFlags       () const;
 +    /**
 +      Returns the regular expression this pattern represents
 +      @return The regular expression this pattern represents
 +     */
 +    bkstring               getWCPattern     () const;
 +    /**
 +      Creates a matcher object using the specified string and this pattern.
 +      @param str The string to match against
 +      @return A new matcher using object using this pattern and the specified
 +              string
 +     */
 +    WCMatcher                 * createWCMatcher  (const bkstring & str);
 +};
 +
 +class NFAUNode
 +{
 +  friend class WCMatcher;
 +  public:
 +    NFAUNode * next;
 +    NFAUNode();
 +    virtual ~NFAUNode();
 +    virtual void findAllNodes(std::map<NFAUNode*, bool> & soFar);
 +    virtual int match(const bkstring & str, WCMatcher * matcher, const int curInd = 0) const = 0;
 +    inline virtual bool isGroupHeadNode()     const { return false; }
 +    inline virtual bool isStartOfInputNode()  const { return false; }
 +};
 +class NFACharUNode : public NFAUNode
 +{
 +  protected:
 +    wchar_t ch;
 +  public:
 +    NFACharUNode(const wchar_t c);
 +    virtual int match(const bkstring & str, WCMatcher * matcher, const int curInd = 0) const;
 +};
 +class NFACICharUNode : public NFAUNode
 +{
 +  protected:
 +    wchar_t ch;
 +  public:
 +    NFACICharUNode(const wchar_t c);
 +    virtual int match(const bkstring & str, WCMatcher * matcher, const int curInd = 0) const;
 +};
 +class NFAStartUNode : public NFAUNode
 +{
 +  public:
 +    NFAStartUNode();
 +    virtual int match(const bkstring & str, WCMatcher * matcher, const int curInd = 0) const;
 +};
 +class NFAEndUNode : public NFAUNode
 +{
 +  public:
 +    NFAEndUNode();
 +    virtual int match(const bkstring & str, WCMatcher * matcher, const int curInd = 0) const;
 +};
 +class NFAQuantifierUNode : public NFAUNode
 +{
 +  public:
 +    int min, max;
 +    NFAUNode * inner;
 +    virtual void findAllNodes(std::map<NFAUNode*, bool> & soFar);
 +    NFAQuantifierUNode(WCPattern * pat, NFAUNode * internal,
 +                      const int minMatch = WCPattern::MIN_QMATCH,
 +                      const int maxMatch = WCPattern::MAX_QMATCH);
 +    virtual int match(const bkstring & str, WCMatcher * matcher, const int curInd = 0) const;
 +};
 +class NFAGreedyQuantifierUNode : public NFAQuantifierUNode
 +{
 +  public:
 +    NFAGreedyQuantifierUNode(WCPattern * pat, NFAUNode * internal,
 +                            const int minMatch = WCPattern::MIN_QMATCH,
 +                            const int maxMatch = WCPattern::MAX_QMATCH);
 +    virtual int match(const bkstring & str, WCMatcher * matcher, const int curInd = 0) const;
 +    virtual int matchInternal(const bkstring & str, WCMatcher * matcher, const int curInd, const int soFar) const;
 +};
 +class NFALazyQuantifierUNode : public NFAQuantifierUNode
 +{
 +  public:
 +    NFALazyQuantifierUNode(WCPattern * pat, NFAUNode * internal,
 +                          const int minMatch = WCPattern::MIN_QMATCH,
 +                          const int maxMatch = WCPattern::MAX_QMATCH);
 +    virtual int match(const bkstring & str, WCMatcher * matcher, const int curInd = 0) const;
 +};
 +class NFAPossessiveQuantifierUNode : public NFAQuantifierUNode
 +{
 +  public:
 +    NFAPossessiveQuantifierUNode(WCPattern * pat, NFAUNode * internal,
 +                                const int minMatch = WCPattern::MIN_QMATCH,
 +                                const int maxMatch = WCPattern::MAX_QMATCH);
 +    virtual int match(const bkstring & str, WCMatcher * matcher, const int curInd = 0) const;
 +};
 +class NFAAcceptUNode : public NFAUNode
 +{
 +  public:
 +    NFAAcceptUNode();
 +    virtual int match(const bkstring & str, WCMatcher * matcher, const int curInd = 0) const;
 +};
 +class NFAClassUNode : public NFAUNode
 +{
 +  public:
 +    bool inv;
 +    std::map<wchar_t, bool> vals;
 +    NFAClassUNode(const bool invert = 0);
 +    NFAClassUNode(const bkstring & clazz, const bool invert);
 +    virtual int match(const bkstring & str, WCMatcher * matcher, const int curInd = 0) const;
 +};
 +class NFACIClassUNode : public NFAUNode
 +{
 +  public:
 +    bool inv;
 +    std::map<wchar_t, bool> vals;
 +    NFACIClassUNode(const bool invert = 0);
 +    NFACIClassUNode(const bkstring & clazz, const bool invert);
 +    virtual int match(const bkstring & str, WCMatcher * matcher, const int curInd = 0) const;
 +};
 +class NFASubStartUNode : public NFAUNode
 +{
 +  public:
 +    NFASubStartUNode();
 +    virtual int match(const bkstring & str, WCMatcher * matcher, const int curInd = 0) const;
 +};
 +class NFAOrUNode : public NFAUNode
 +{
 +  public:
 +    NFAUNode * one;
 +    NFAUNode * two;
 +    NFAOrUNode(NFAUNode * first, NFAUNode * second);
 +    virtual void findAllNodes(std::map<NFAUNode*, bool> & soFar);
 +    virtual int match(const bkstring & str, WCMatcher * matcher, const int curInd = 0) const;
 +};
 +class NFAQuoteUNode : public NFAUNode
 +{
 +  public:
 +    bkstring qStr;
 +    NFAQuoteUNode(const bkstring & quoted);
 +    virtual int match(const bkstring & str, WCMatcher * matcher, const int curInd = 0) const;
 +};
 +class NFACIQuoteUNode : public NFAUNode
 +{
 +  public:
 +    bkstring qStr;
 +    NFACIQuoteUNode(const bkstring & quoted);
 +    virtual int match(const bkstring & str, WCMatcher * matcher, const int curInd = 0) const;
 +};
 +class NFALookAheadUNode : public NFAUNode
 +{
 +  public:
 +    bool pos;
 +    NFAUNode * inner;
 +    NFALookAheadUNode(NFAUNode * internal, const bool positive);
 +    virtual void findAllNodes(std::map<NFAUNode*, bool> & soFar);
 +    virtual int match(const bkstring & str, WCMatcher * matcher, const int curInd = 0) const;
 +};
 +class NFALookBehindUNode : public NFAUNode
 +{
 +  public:
 +    bool pos;
 +    bkstring mStr;
 +    NFALookBehindUNode(const bkstring & str, const bool positive);
 +    virtual int match(const bkstring & str, WCMatcher * matcher, const int curInd = 0) const;
 +};
 +class NFAStartOfLineUNode : public NFAUNode
 +{
 +  public:
 +    NFAStartOfLineUNode();
 +    virtual int match(const bkstring & str, WCMatcher * matcher, const int curInd = 0) const;
 +};
 +class NFAEndOfLineUNode : public NFAUNode
 +{
 +  public:
 +    NFAEndOfLineUNode();
 +    virtual int match(const bkstring & str, WCMatcher * matcher, const int curInd = 0) const;
 +};
 +class NFAReferenceUNode : public NFAUNode
 +{
 +  public:
 +    int gi;
 +    NFAReferenceUNode(const int groupIndex);
 +    virtual int match(const bkstring & str, WCMatcher * matcher, const int curInd = 0) const;
 +};
 +class NFAStartOfInputUNode : public NFAUNode
 +{
 +  public:
 +    NFAStartOfInputUNode();
 +    virtual int match(const bkstring & str, WCMatcher * matcher, const int curInd = 0) const;
 +    inline virtual bool isStartOfInputNode()  const { return false; }
 +};
 +class NFAEndOfInputUNode : public NFAUNode
 +{
 +  public:
 +    bool term;
 +    NFAEndOfInputUNode(const bool lookForTerm);
 +    virtual int match(const bkstring & str, WCMatcher * matcher, const int curInd = 0) const;
 +};
 +class NFAWordBoundaryUNode : public NFAUNode
 +{
 +  public:
 +    bool pos;
 +    NFAWordBoundaryUNode(const bool positive);
 +    virtual int match(const bkstring & str, WCMatcher * matcher, const int curInd = 0) const;
 +};
 +class NFAEndOfMatchUNode : public NFAUNode
 +{
 +  public:
 +    NFAEndOfMatchUNode();
 +    virtual int match(const bkstring & str, WCMatcher * matcher, const int curInd = 0) const;
 +};
 +class NFAGroupHeadUNode : public NFAUNode
 +{
 +  public:
 +    int gi;
 +    NFAGroupHeadUNode(const int groupIndex);
 +    virtual int match(const bkstring & str, WCMatcher * matcher, const int curInd = 0) const;
 +    inline virtual bool isGroupHeadNode()     const { return false; }
 +};
 +class NFAGroupTailUNode : public NFAUNode
 +{
 +  public:
 +    int gi;
 +    NFAGroupTailUNode(const int groupIndex);
 +    virtual int match(const bkstring & str, WCMatcher * matcher, const int curInd = 0) const;
 +};
 +class NFAGroupLoopPrologueUNode : public NFAUNode
 +{
 +  public:
 +    int gi;
 +    NFAGroupLoopPrologueUNode(const int groupIndex);
 +    virtual int match(const bkstring & str, WCMatcher * matcher, const int curInd = 0) const;
 +};
 +class NFAGroupLoopUNode : public NFAUNode
 +{
 +  public:
 +    int gi, min, max, type;
 +    NFAUNode * inner;
 +    NFAGroupLoopUNode(NFAUNode * internal, const int minMatch,
 +                     const int maxMatch, const int groupIndex, const int matchType);
 +    virtual void findAllNodes(std::map<NFAUNode*, bool> & soFar);
 +    virtual int match(const bkstring & str, WCMatcher * matcher, const int curInd = 0) const;
 +    int matchGreedy(const bkstring & str, WCMatcher * matcher, const int curInd = 0) const;
 +    int matchLazy(const bkstring & str, WCMatcher * matcher, const int curInd = 0) const;
 +    int matchPossessive(const bkstring & str, WCMatcher * matcher, const int curInd = 0) const;
 +};
 +
 +#endif
 +
 diff --git a/plugins/SmileyAdd/src/regexp/test.cpp b/plugins/SmileyAdd/src/regexp/test.cpp new file mode 100644 index 0000000000..bb41566f96 --- /dev/null +++ b/plugins/SmileyAdd/src/regexp/test.cpp @@ -0,0 +1,38 @@ +#include <regexp/Matcher.h> +#include <regexp/Pattern.h> +#include <regexp/UnicodeMatcher.h> +#include <regexp/UnicodePattern.h> + +int main() +{ +  { +    Pattern * p = Pattern::compile("^([^:]*)://([^/:]*)((?::[0-9]+)?)/?(([^?]*)((?:\\?.*)?))$"); +    Matcher * m0 = p->createMatcher("http://www.example.com:80/test.php?a=1&a=1&a=1"); + +    if (m0->matches()) +    { +      std::vector<std::string> groups = m0->getGroups(true); +      for (int i = 0; i < (int)groups.size(); ++i) +      { +        printf("m->group(%d): %s\n", i, groups[i].c_str()); +      } +    } +  } +  { +    std::wstring pat = L"^([^:]*)://([^/:]*)((?::[0-9]+)?)/?(([^?]*)((?:\\?.*)?))$"; +    std::wstring mat = L"http://www.example.com:80/test.php?a=1&a=1&a=1"; +    UnicodePattern * p = UnicodePattern::compile(pat); +    UnicodeMatcher * m0 = p->createUnicodeMatcher(mat); + +    if (m0->matches()) +    { +      std::vector<std::wstring> groups = m0->getGroups(true); +      for (int i = 0; i < (int)groups.size(); ++i) +      { +        wprintf(L"m->group(%d): %s\n", i, groups[i].c_str()); +      } +    } +  } + +  return 0; +} | 
