summaryrefslogtreecommitdiff
path: root/plugins/SmileyAdd/src/regexp
diff options
context:
space:
mode:
authorKirill Volinsky <mataes2007@gmail.com>2013-03-09 14:21:01 +0000
committerKirill Volinsky <mataes2007@gmail.com>2013-03-09 14:21:01 +0000
commite674d7f213a219f59cc066ec9268f416180d6031 (patch)
tree73fabd676588fbe872123d9f7ac1607901c2b43d /plugins/SmileyAdd/src/regexp
parentc11aeb37af849927d5cba20275aa118ae127d756 (diff)
added precompiled header
added version info git-svn-id: http://svn.miranda-ng.org/main/trunk@3939 1316c22d-e87f-b044-9b9b-93d7a3e3ba9c
Diffstat (limited to 'plugins/SmileyAdd/src/regexp')
-rw-r--r--plugins/SmileyAdd/src/regexp/Matcher.cpp178
-rw-r--r--plugins/SmileyAdd/src/regexp/Matcher.h248
-rw-r--r--plugins/SmileyAdd/src/regexp/Pattern.cpp1709
-rw-r--r--plugins/SmileyAdd/src/regexp/Pattern.h1663
-rw-r--r--plugins/SmileyAdd/src/regexp/WCMatcher.cpp3
-rw-r--r--plugins/SmileyAdd/src/regexp/WCMatcher.h4
-rw-r--r--plugins/SmileyAdd/src/regexp/WCPattern.cpp19
-rw-r--r--plugins/SmileyAdd/src/regexp/WCPattern.h9
-rw-r--r--plugins/SmileyAdd/src/regexp/test.cpp38
9 files changed, 3 insertions, 3868 deletions
diff --git a/plugins/SmileyAdd/src/regexp/Matcher.cpp b/plugins/SmileyAdd/src/regexp/Matcher.cpp
deleted file mode 100644
index ebd1f57850..0000000000
--- a/plugins/SmileyAdd/src/regexp/Matcher.cpp
+++ /dev/null
@@ -1,178 +0,0 @@
-#include <Matcher.h>
-#include <Pattern.h>
-
-const int Matcher::MATCH_ENTIRE_STRING = 0x01;
-
-/*
- Detailed documentation is provided in this class' header file
-
- @author Jeffery Stuart
- @since November 2004
- @version 1.07.00
-*/
-
-Matcher::Matcher(Pattern * pattern, const bkstring & text)
-{
- pat = pattern;
- str = &text;
- gc = pattern->groupCount;
- ncgc = -pattern->nonCapGroupCount;
- flags = 0;
- matchedSomething = false;
- starts = new int[gc + ncgc];
- ends = new int[gc + ncgc];
- groups = new int[gc + ncgc];
- groupPos = new int[gc + ncgc];
- groupIndeces = new int[gc + ncgc];
- starts = starts + ncgc;
- ends = ends + ncgc;
- groups = groups + ncgc;
- groupPos = groupPos + ncgc;
- groupIndeces = groupIndeces + ncgc;
- for (int i = 0; i < gc; ++i) starts[i] = ends[i] = 0;
-}
-Matcher::~Matcher()
-{
- delete [] (starts - ncgc);
- delete [] (ends - ncgc);
- delete [] (groups - ncgc);
- delete [] (groupIndeces - ncgc);
- delete [] (groupPos - ncgc);
-}
-void Matcher::clearGroups()
-{
- int i;
- lm = 0;
- for (i = 0; i < gc; ++i) groups[i] = starts[i] = ends[i] = -1;
- for (i = 1; i <= ncgc; ++i) groups[0 - i] = -1;
-}
-bkstring Matcher::replaceWithGroups(const bkstring & str)
-{
- bkstring ret = "";
-
- bkstring t = str;
- while (t.size() > 0)
- {
- if (t[0] == '\\')
- {
- t.erase(0, 1);
- if (t.size() == 0)
- {
- ret += "\\";
- }
- else if (t[0] < '0' || t[0] > '9')
- {
- ret += t[0];
- t.erase(0, 1);
- }
- else
- {
- int gn = 0;
- while (t.size() > 0 && t[0] >= '0' && t[0] <= '9')
- {
- gn = gn * 10 + (t[0] - '0');
- t.erase(0, 1);
- }
- ret += getGroup(gn);
- }
- }
- else
- {
- ret += t[0];
- t.erase(0, 1);
- }
- }
-
- return ret;
-}
-unsigned long Matcher::getFlags() const
-{
- return flags;
-}
-const bkstring& Matcher::getText() const
-{
- return *str;
-}
-
-bool Matcher::matches()
-{
- flags = MATCH_ENTIRE_STRING;
- matchedSomething = false;
- clearGroups();
- lm = 0;
- return pat->head->match(*str, this, 0) == (int)str->size();
-}
-bool Matcher::findFirstMatch()
-{
- starts[0] = 0;
- flags = 0;
- clearGroups();
- start = 0;
- lm = 0;
- ends[0] = pat->head->match(*str, this, 0);
- if (ends[0] >= 0)
- {
- matchedSomething = true;
- return 1;
- }
- return 0;
-}
-bool Matcher::findNextMatch()
-{
- int s = starts[0], e = ends[0];
-
- if (!matchedSomething) return findFirstMatch();
- if (s == e) ++e;
- flags = 0;
- clearGroups();
-
- starts[0] = e;
- if (e >= (int)str->size()) return 0;
- start = e;
- lm = e;
- ends[0] = pat->head->match(*str, this, e);
- return ends[0] >= 0;
-}
-std::vector<bkstring> Matcher::findAll()
-{
- std::vector<bkstring> ret;
- reset();
- while (findNextMatch())
- {
- ret.push_back(getGroup());
- }
- return ret;
-}
-
-void Matcher::reset()
-{
- lm = 0;
- clearGroups();
- matchedSomething = false;
-}
-
-int Matcher::getStartingIndex(const int groupNum) const
-{
- if (groupNum < 0 || groupNum >= gc) return -1;
- return starts[groupNum];
-}
-int Matcher::getEndingIndex(const int groupNum) const
-{
- if (groupNum < 0 || groupNum >= gc) return -1;
- return ends[groupNum];
-}
-bkstring Matcher::getGroup(const int groupNum) const
-{
- if (groupNum < 0 || groupNum >= gc) return "";
- if (starts[groupNum] < 0 || ends[groupNum] < 0) return "";
- return str->substr(starts[groupNum], ends[groupNum] - starts[groupNum]);
-}
-std::vector<bkstring> Matcher::getGroups(const bool includeGroupZero) const
-{
- int i, start = (includeGroupZero ? 0 : 1);
- std::vector<bkstring> ret;
-
- for (i = start; i < gc; ++i) ret.push_back(getGroup(i));
- return ret;
-}
-
diff --git a/plugins/SmileyAdd/src/regexp/Matcher.h b/plugins/SmileyAdd/src/regexp/Matcher.h
deleted file mode 100644
index 621184d8c2..0000000000
--- a/plugins/SmileyAdd/src/regexp/Matcher.h
+++ /dev/null
@@ -1,248 +0,0 @@
-#ifndef __MATCHER_H__
-#define __MATCHER_H__
-
-#include "bkstring.h"
-#include <vector>
-
-class Vector;
-class NFANode;
-class NFAStartNode;
-class NFAEndNode;
-class NFAGroupHeadNode;
-class NFAGroupLoopNode;
-class NFAGroupLoopPrologueNode;
-class NFAGroupTailNode;
-class NFALookBehindNode;
-class NFAStartOfLineNode;
-class NFAEndOfLineNode;
-class NFAEndOfMatchNode;
-class NFAReferenceNode;
-class Pattern;
-
-/**
- A matcher is a non thread-safe object used to scan strings using a given
- {@link Pattern Pattern} object. Using a <code>Matcher</code> is the preferred
- method for scanning strings. Matchers are not thread-safe. Matchers require
- very little dynamic memory, hence one is encouraged to create several
- instances of a matcher when necessary as opposed to sharing a single instance
- of a matcher.
- <p>
- The most common methods needed by the matcher are <code>matches</code>,
- <code>findNextMatch</code>, and <code>getGroup</code>. <code>matches</code>
- and <code>findNextMatch</code> both return success or failure, and further
- details can be gathered from their documentation.
- <p>
- Unlike Java's <code>Matcher</code>, this class allows you to change the string
- you are matching against. This provides a small optimization, since you no
- longer need multiple matchers for a single pattern in a single thread.
- <p>
- This class also provides an extremely handy method for replacing text with
- captured data via the <code>replaceWithGroups</code> method. A typical
- invocation looks like:
- <pre>
- char buf[10000];
- bkstring str = "\\5 (user name \\1) uses \\7 for his/her shell and \\6 is their home directory";
- FILE * fp = fopen("/etc/passwd", "r");
- Pattern::registerPattern("entry", "[^:]+");
- Pattern * p = Pattern::compile("^({entry}):({entry}):({entry}):({entry}):({entry}):({entry}):({entry})$",
- &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Pattern::MULTILINE_MATCHING | Pattern::UNIX_LINE_MODE);
- Matcher * m = p->createMatcher("");
- while (fgets(buf, 9999, fp))
- {
- &nbsp;&nbsp;m->setString(buf);
- &nbsp;&nbsp;if (m->matches())
- &nbsp;&nbsp;{
- &nbsp;&nbsp;&nbsp;&nbsp;printf("%s\n", m->replaceWithGroups(str).c_str());
- &nbsp;&nbsp;}
- }
- fclose(fp);
-
- </pre>
- Calling any of the following functions before first calling
- <code>matches</code>, <code>findFirstMatch</code>, or
- <code>findNextMatch</code> results in undefined behavior and may cause your
- program to crash.
- <code>
- <ul>
- <li>replaceWithGroups</code>
- <li>getStartingIndex</li>
- <li>getEndingIndex</li>
- <li>getGroup</li>
- <li>getGroups</li>
- </ul>
- </code>
- <p>
- The function <code>findFirstMatch</code> will attempt to find the first match
- in the input string. The same results can be obtained by first calling
- <code>reset</code> followed by <code>findNextMatch</code>.
- <p>
- To eliminate the necessity of looping through a string to find all the
- matching substrings, <code>findAll</code> was created. The function will find
- all matching substrings and return them in a <code>vector</code>. If you need
- to examine specific capture groups within the substrings, then this method
- should not be used.
-
- @author Jeffery Stuart
- @since March 2003, Stable Since November 2004
- @version 1.07.00
- @memo Mutable object used on instances of a Pattern class
- */
-class Matcher
-{
- friend class NFANode;
- friend class NFAStartNode;
- friend class NFAEndNode;
- friend class NFAGroupHeadNode;
- friend class NFAGroupLoopNode;
- friend class NFAGroupLoopPrologueNode;
- friend class NFAGroupTailNode;
- friend class NFALookBehindNode;
- friend class NFAStartOfLineNode;
- friend class NFAEndOfLineNode;
- friend class NFAEndOfMatchNode;
- friend class NFAReferenceNode;
- friend class Pattern;
- private:
- /**
- Creates a new matcher object against <code>text</code> using
- <code>pattern</code>.
-
- @param pattern The pattern with which to search
- @param text The text in which to search
- */
- Matcher(Pattern * pattern, const bkstring & text);
- protected:
- /// The pattern we use to match
- Pattern * pat;
- /// The string in which we are matching
- const bkstring* str;
- /// The starting point of our match
- int start;
- /// An array of the starting positions for each group
- int * starts;
- /// An array of the ending positions for each group
- int * ends;
- /// An array of private data used by NFANodes during matching
- int * groups;
- /// An array of private data used by NFANodes during matching
- int * groupIndeces;
- /// An array of private data used by NFANodes during matching
- int * groupPos;
- /// The ending index of the last match
- int lm;
- /// The number of capturing groups we have
- int gc;
- /// The number of non-capturing groups we havew
- int ncgc;
- /// Whether or not we have matched something (used only by findFirstMatch and findNextMatch)
- int matchedSomething;
- /// The flags with which we were made
- unsigned long flags;
- /// Called by reset to clear the group arrays
- void clearGroups();
- public:
- /// Used internally by match to signify we want the entire string matched
- const static int MATCH_ENTIRE_STRING;
- public:
- /// Cleans up the dynamic memory used by this matcher
- ~Matcher();
- /**
- Replaces the contents of <code>str</code> with the appropriate captured
- text. <code>str</code> should have at least one back reference, otherwise
- this function does nothing.
- @param str The string in which to replace text
- @return A string with all backreferences appropriately replaced
- */
- bkstring replaceWithGroups(const bkstring & str);
- /**
- The flags currently being used by the matcher.
- @return Zero
- */
- unsigned long getFlags() const;
- /**
- The text being searched by the matcher.
- @return the text being searched by the matcher.
- */
- const bkstring& getText() const;
-
- /**
- Scans the string from start to finish for a match. The entire string must
- match for this function to return success. Group variables are
- appropriately set and can be queried after this function returns.
-
- @return Success if and only if the entire string matches the pattern
- */
- bool matches();
- /**
- Scans the string for the first substring matching the pattern. The entire
- string does not necessarily have to match for this function to return
- success. Group variables are appropriately set and can be queried after
- this function returns.
-
- @return Success if any substring matches the specified pattern
- */
- bool findFirstMatch();
- /**
- Scans the string for the next substring matching the pattern. If no calls
- have been made to findFirstMatch of findNextMatch since the last call to
- reset, matches, or setString, then this function's behavior results to
- that of findFirstMatch.
-
- @return Success if another substring can be found that matches the pattern
- */
- bool findNextMatch();
- /**
- Returns a vector of every substring in order which matches the given
- pattern.
-
- @return Every substring in order which matches the given pattern
- */
- std::vector<bkstring> findAll();
- /**
- Resets the internal state of the matcher
- */
- void reset();
- /**
- Same as getText. Left n for backwards compatibilty with old source code
- @return Returns the string that is currently being used for matching
- */
- inline const bkstring& getString() const { return *str; }
- /**
- Sets the string to scan
- @param newStr The string to scan for subsequent matches
- */
- inline void setString(const bkstring & newStr) { str = &newStr; reset(); }
-
- /**
- Returns the starting index of the specified group.
- @param groupNum The group to query
- @return The starting index of the group if it was matched, -1 for an
- invalid group or if the group was not matched
- */
- int getStartingIndex(const int groupNum = 0) const;
- /**
- Returns the ending index of the specified group.
- @param groupNum The group to query
- @return The ending index of the group if it was matched, -1 for an
- invalid group or if the group was not matched
- */
- int getEndingIndex(const int groupNum = 0) const;
- /**
- Returns the specified group. An empty string ("") does not necessarily
- mean the group was not matched. A group such as (a*b?) could be matched by
- a zero length. If an empty string is returned, getStartingIndex can be
- called to determine if the group was actually matched.
- @param groupNum The group to query
- @return The text of the group
- */
- bkstring getGroup(const int groupNum = 0) const;
- /**
- Returns every capture group in a vector
-
- @param includeGroupZero Whether or not include capture group zero
- @return Every capture group
- */
- std::vector<bkstring> getGroups(const bool includeGroupZero = 0) const;
-};
-
-#endif
diff --git a/plugins/SmileyAdd/src/regexp/Pattern.cpp b/plugins/SmileyAdd/src/regexp/Pattern.cpp
deleted file mode 100644
index 4487dddd4b..0000000000
--- a/plugins/SmileyAdd/src/regexp/Pattern.cpp
+++ /dev/null
@@ -1,1709 +0,0 @@
-/**
- From the author (Jeff Stuart)
- "
- Let me start by saying this file is pretty big. If you feel up to it, you can
- try making changes yourself, but you would be better off to just email me at
- stuart@cs.ucdavis.edu if you think there is a bug, or have something useful you
- would like added. This project is very "near and dear" to me, so I am fairly
- quick to make bug fixes. The header files for Pattern and Matcher are fairly
- well documented and the function names are pretty self-explanatory, but if you
- are having any trouble, feel free to email me at stuart@cs.ucdavis.edu.
-
- If you email me, make sure you put something like C++RE in the subject because
- I tend to delete email if I don't recognize the name and the subject is
- something like "I Need Your Help" or "Got A Second" or "I Found It".
- "
- */
-
-/*
- Detailed documentation is provided in this class' header file
-
- @author Jeffery Stuart
- @since November 2004
- @version 1.07.00
-*/
-
-#define to_lower(a) (char)(unsigned)CharLowerA((LPSTR)static_cast<unsigned char>(a))
-#define is_alpha IsCharAlphaA
-
-#ifdef _WIN32
- #pragma warning(push)
- #pragma warning(disable:4996)
- #define str_icmp lstrcmpiA
-#else
- #define str_icmp strcasecmp
-#endif
-
-#include <Pattern.h>
-#include <Matcher.h>
-#include <cstring>
-#include <algorithm>
-
-std::map<bkstring, Pattern *> Pattern::compiledPatterns;
-std::map<bkstring, std::pair<bkstring, unsigned long> > Pattern::registeredPatterns;
-
-const int Pattern::MIN_QMATCH = 0x00000000;
-const int Pattern::MAX_QMATCH = 0x7FFFFFFF;
-
-const unsigned long Pattern::CASE_INSENSITIVE = 0x01;
-const unsigned long Pattern::LITERAL = 0x02;
-const unsigned long Pattern::DOT_MATCHES_ALL = 0x04;
-const unsigned long Pattern::MULTILINE_MATCHING = 0x08;
-const unsigned long Pattern::UNIX_LINE_MODE = 0x10;
-
-Pattern::Pattern(const bkstring & rhs)
-{
- matcher = NULL;
- pattern = rhs;
- curInd = 0;
- groupCount = 0;
- nonCapGroupCount = 0;
- error = 0;
- head = NULL;
-}
-// convenience function in case we want to add any extra debugging output
-void Pattern::raiseError()
-{
-/* switch (pattern[curInd - 1])
- {
- case '*':
- case ')':
- case '+':
- case '?':
- case ']':
- case '}':
- fprintf(stderr, "%s\n%*c^\n", pattern.c_str(), curInd - 1, ' ');
- fprintf(stderr, "Syntax Error near here. Possible unescaped meta character.\n");
- break;
- default:
- fprintf(stderr, "%s\n%*c^\n", pattern.c_str(), curInd - 1, ' ');
- fprintf(stderr, "Syntax Error near here. \n");
- break;
- }*/
- error = 1;
-}
-NFANode * Pattern::registerNode(NFANode * node)
-{
- nodes[node] = 1;
- return node;
-}
-
-bkstring Pattern::classUnion (bkstring s1, bkstring s2) const
-{
- char out[300];
- std::sort(s1.begin(), s1.end());
- std::sort(s2.begin(), s2.end());
- *std::set_union(s1.begin(), s1.end(), s2.begin(), s2.end(), out) = 0;
- return out;
-}
-bkstring Pattern::classIntersect (bkstring s1, bkstring s2) const
-{
- char out[300];
- std::sort(s1.begin(), s1.end());
- std::sort(s2.begin(), s2.end());
- *std::set_intersection(s1.begin(), s1.end(), s2.begin(), s2.end(), out) = 0;
- return out;
-}
-bkstring Pattern::classNegate (bkstring s1) const
-{
- char out[300];
- int i, ind = 0;
- std::map<char, bool> m;
-
- for (i = 0; i < (int)s1.size(); ++i) m[s1[i]] = 1;
- for (i = 0xFF; i >= 0; --i) if (m.find((char)i) == m.end()) out[ind++] = (char)i;
- out[ind] = 0;
- return bkstring(out, ind);
-}
-bkstring Pattern::classCreateRange(char low, char hi) const
-{
- char out[300];
- int ind = 0;
- while (low != hi) out[ind++] = low++;
- out[ind++] = low;
- return bkstring(out, ind);
-}
-
-int Pattern::getInt(int start, int end)
-{
- int ret = 0;
- for (; start <= end; ++start) ret = ret * 10 + (pattern[start] - '0');
- return ret;
-}
-bool Pattern::quantifyCurly(int & sNum, int & eNum)
-{
- bool good = 1;
- int i, ci = curInd + 1;
- int commaInd = ci, endInd = ci, len = (int)pattern.size();
- sNum = eNum = 0;
-
- while (endInd < len && pattern[endInd ] != '}') ++endInd;
- while (commaInd < endInd && pattern[commaInd] != ',') ++commaInd;
- if (endInd >= len) { raiseError(); return 0; }
- for (i = ci; good && i < endInd; ++i) if (i != commaInd && !isdigit(pattern[i])) good = 0;
- if (!good && commaInd < endInd) { raiseError(); return 0; }
- if (!good) return 0;
- /* so now everything in here is either a comma (and there is at most one comma) or a digit */
- if (commaInd == ci) // {,*}
- {
- if (endInd == commaInd + 1) { sNum = MIN_QMATCH; eNum = MAX_QMATCH; } // {,} = *
- else { sNum = MIN_QMATCH; eNum = getInt(commaInd + 1, endInd - 1); } // {,+}
- }
- else if (commaInd == endInd - 1) { sNum = getInt(ci, commaInd - 1); eNum = MAX_QMATCH; } // {+,}
- else if (commaInd == endInd) { sNum = getInt(ci, endInd - 1); eNum = sNum; } // {+}
- else { sNum = getInt(ci, commaInd - 1); eNum = getInt(commaInd + 1, endInd - 1); } // {+,+}
- curInd = endInd + 1;
- return 1;
-}
-NFANode * Pattern::quantifyGroup(NFANode * start, NFANode * stop, const int gn)
-{
- NFANode * newNode = NULL;
- int type = 0;
-
- if (curInd < (int)pattern.size())
- {
- char ch = (curInd + 1 >= (int)pattern.size()) ? -1 : pattern[curInd + 1];
- switch (pattern[curInd])
- {
- case '*':
- ++curInd;
- switch (ch)
- {
- case '?': ++curInd; type = 1; break;
- case '+': ++curInd; type = 2; break;
- }
- newNode = registerNode(new NFAGroupLoopPrologueNode(gn));
- newNode->next = registerNode(new NFAGroupLoopNode(start, MIN_QMATCH, MAX_QMATCH, gn, type));
- stop->next = newNode->next;
- return newNode;
- case '?':
- ++curInd;
- switch (ch)
- {
- case '?': ++curInd; type = 1; break;
- case '+': ++curInd; type = 2; break;
- }
- newNode = registerNode(new NFAGroupLoopPrologueNode(gn));
- newNode->next = registerNode(new NFAGroupLoopNode(start, MIN_QMATCH, 1, gn, type));
- stop->next = newNode->next;
- return newNode;
- case '+':
- ++curInd;
- switch (ch)
- {
- case '?': ++curInd; type = 1; break;
- case '+': ++curInd; type = 2; break;
- }
- newNode = registerNode(new NFAGroupLoopPrologueNode(gn));
- newNode->next = registerNode(new NFAGroupLoopNode(start, 1, MAX_QMATCH, gn, type));
- stop->next = newNode->next;
- return newNode;
- case '{':
- {
- int s, e;
- if (quantifyCurly(s, e))
- {
- ch = (curInd < (int)pattern.size()) ? pattern[curInd] : -1;
- switch (ch)
- {
- case '?': ++curInd; type = 1; break;
- case '+': ++curInd; type = 2; break;
- }
- newNode = registerNode(new NFAGroupLoopPrologueNode(gn));
- newNode->next = registerNode(new NFAGroupLoopNode(start, s, e, gn, type));
- stop->next = newNode->next;
- return newNode;
- }
- }
- default:
- break;
- }
- }
- return NULL;
-}
-
-NFANode * Pattern::quantify(NFANode * newNode)
-{
- if (curInd < (int)pattern.size())
- {
- char ch = (curInd + 1 >= (int)pattern.size()) ? -1 : pattern[curInd + 1];
- switch (pattern[curInd])
- {
- case '*':
- ++curInd;
- switch (ch)
- {
- case '?': ++curInd; newNode = registerNode(new NFALazyQuantifierNode (this, newNode, MIN_QMATCH, MAX_QMATCH)); break;
- case '+': ++curInd; newNode = registerNode(new NFAPossessiveQuantifierNode(this, newNode, MIN_QMATCH, MAX_QMATCH)); break;
- default: newNode = registerNode(new NFAGreedyQuantifierNode (this, newNode, MIN_QMATCH, MAX_QMATCH)); break;
- }
- break;
- case '?':
- ++curInd;
- switch (ch)
- {
- case '?': ++curInd; newNode = registerNode(new NFALazyQuantifierNode (this, newNode, MIN_QMATCH, 1)); break;
- case '+': ++curInd; newNode = registerNode(new NFAPossessiveQuantifierNode(this, newNode, MIN_QMATCH, 1)); break;
- default: newNode = registerNode(new NFAGreedyQuantifierNode (this, newNode, MIN_QMATCH, 1)); break;
- }
- break;
- case '+':
- ++curInd;
- switch (ch)
- {
- case '?': ++curInd; newNode = registerNode(new NFALazyQuantifierNode (this, newNode, 1, MAX_QMATCH)); break;
- case '+': ++curInd; newNode = registerNode(new NFAPossessiveQuantifierNode(this, newNode, 1, MAX_QMATCH)); break;
- default: newNode = registerNode(new NFAGreedyQuantifierNode (this, newNode, 1, MAX_QMATCH)); break;
- }
- break;
- case '{':
- {
- int s, e;
- if (quantifyCurly(s, e))
- {
- ch = (curInd < (int)pattern.size()) ? pattern[curInd] : -1;
- switch (ch)
- {
- case '?': ++curInd; newNode = registerNode(new NFALazyQuantifierNode (this, newNode, s, e)); break;
- case '+': ++curInd; newNode = registerNode(new NFAPossessiveQuantifierNode(this, newNode, s, e)); break;
- default: newNode = registerNode(new NFAGreedyQuantifierNode (this, newNode, s, e)); break;
- }
- }
- }
- break;
- default:
- break;
- }
- }
- return newNode;
-}
-bkstring Pattern::parseClass()
-{
- bkstring t, ret = "";
- char ch, c1, c2;
- bool inv = 0, neg = 0, quo = 0;
-
- if (curInd < (int)pattern.size() && pattern[curInd] == '^')
- {
- ++curInd;
- neg = 1;
- }
- while (curInd < (int)pattern.size() && pattern[curInd] != ']')
- {
- ch = pattern[curInd++];
- if (ch == '[')
- {
- t = parseClass();
- ret = classUnion(ret, t);
- }
- /*else if (ch == '-')
- {
- raiseError();
- curInd = pattern.size();
- }*/
- else if (ch == '&' && curInd < (int)pattern.size() && pattern[curInd] == '&')
- {
- if (pattern[++curInd] != '[')
- {
- raiseError();
- curInd = (int)pattern.size();
- }
- else
- {
- ++curInd;
- t = parseClass();
- ret = classIntersect(ret, t);
- }
- }
- else if (ch == '\\')
- {
- t = parseEscape(inv, quo);
- if (quo)
- {
- raiseError();
- curInd = (int)pattern.size();
- }
- else if (inv || t.size() > 1) // cant be part of a range (a-z)
- {
- if (inv) t = classNegate(t);
- ret = classUnion(ret, t);
- }
- else if (curInd < (int)pattern.size() && pattern[curInd] == '-') // part of a range (a-z)
- {
- c1 = t[0];
- ++curInd;
- if (curInd >= (int)pattern.size()) raiseError();
- else
- {
- c2 = pattern[curInd++];
- if (c2 == '\\')
- {
- t = parseEscape(inv, quo);
- if (quo)
- {
- raiseError();
- curInd = (int)pattern.size();
- }
- else if (inv || t.size() > 1) raiseError();
- else ret = classUnion(ret, classCreateRange(c1, c2));
- }
- else if (c2 == '[' || c2 == ']' || c2 == '-' || c2 == '&')
- {
- raiseError();
- curInd = (int)pattern.size();
- }
- else ret = classUnion(ret, classCreateRange(c1, c2));
- }
- }
- else
- {
- ret = classUnion(ret, t);
- }
- }
- else if (curInd < (int)pattern.size() && pattern[curInd] == '-')
- {
- c1 = ch;
- ++curInd;
- if (curInd >= (int)pattern.size()) raiseError();
- else
- {
- c2 = pattern[curInd++];
- if (c2 == '\\')
- {
- t = parseEscape(inv, quo);
- if (quo)
- {
- raiseError();
- curInd = (int)pattern.size();
- }
- else if (inv || t.size() > 1) raiseError();
- else ret = classUnion(ret, classCreateRange(c1, c2));
- }
- else if (c2 == '[' || c2 == ']' || c2 == '-' || c2 == '&')
- {
- raiseError();
- curInd = (int)pattern.size();
- }
- else
- {
- ret = classUnion(ret, classCreateRange(c1, c2));
- }
- }
- }
- else
- {
- ret += " ";
- ret[ret.size() - 1] = ch;
- }
- }
- if (curInd >= (int)pattern.size() || pattern[curInd] != ']')
- {
- raiseError();
- ret = "";
- }
- else
- {
- ++curInd;
- if (neg) ret = classNegate(ret);
- }
- return ret;
-}
-bkstring Pattern::parsePosix()
-{
- bkstring s7 = pattern.substr(curInd, 7);
- if (s7 == "{Lower}") { curInd += 7; return "abcdefghijklmnopqrstuvwxyz"; }
- if (s7 == "{Upper}") { curInd += 7; return "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; }
- if (s7 == "{Alpha}") { curInd += 7; return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; }
- if (s7 == "{Digit}") { curInd += 7; return "0123456789"; }
- if (s7 == "{Alnum}") { curInd += 7; return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; }
- if (s7 == "{Punct}") { curInd += 7; return "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; }
- if (s7 == "{Graph}") { curInd += 7; return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; }
- if (s7 == "{Print}") { curInd += 7; return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; }
- if (s7 == "{Blank}") { curInd += 7; return " \t"; }
- if (s7 == "{Space}") { curInd += 7; return " \t\n\x0B\f\r"; }
- if (s7 == "{Cntrl}")
- {
- bkstring::value_type i;
- bkstring s = " ";
-
- for (i = 0; i < 5; ++i) s += s;
- s += " ";
- for (i = 0; i <= 0x1F; ++i) s[i] = i;
- s[0x20] = 0x7F;
- curInd += 7;
- return s;
- }
- if (s7 == "{ASCII}")
- {
- bkstring s(0x80, ' ');
- for (int i = 0; i <= 0x7f; ++i) s[i] = (bkstring::value_type)i;
- curInd += 7;
- return s;
- }
- if (pattern.substr(curInd, 8) == "{XDigit}") { curInd += 8; return "abcdefABCDEF0123456789"; }
- raiseError();
- return "";
-}
-NFANode * Pattern::parseBackref()
-{
- #define is_dig(x) ((x) >= '0' && (x) <= '9')
- #define to_int(x) ((x) - '0')
- int ci = curInd;
- int oldRef = 0, ref = 0;
-
- while (ci < (int)pattern.size() && is_dig(pattern[ci]) && (ref < 10 || ref < groupCount))
- {
- oldRef = ref;
- ref = ref * 10 + to_int(pattern[ci++]);
- }
- if (ci == (int)pattern.size())
- {
- oldRef = ref;
- ++ci;
- }
- if (oldRef < 0 || ci <= curInd)
- {
- raiseError();
- return registerNode(new NFAReferenceNode(-1));
- }
- curInd = ci;
- return registerNode(new NFAReferenceNode(ref));
-
- #undef is_dig
- #undef to_int
-}
-bkstring Pattern::parseOctal()
-{
- #define islowoc(x) ((x) >= '0' && (x) <= '3')
- #define isoc(x) ((x) >= '0' && (x) <= '7')
- #define fromoc(x) ((x) - '0')
- int ci = curInd;
- char ch1 = (ci + 0 < (int)pattern.size()) ? pattern[ci + 0] : -1;
- char ch2 = (ci + 1 < (int)pattern.size()) ? pattern[ci + 1] : -1;
- char ch3 = (ci + 2 < (int)pattern.size()) ? pattern[ci + 2] : -1;
- bkstring s = " ";
-
- if (islowoc(ch1) && isoc(ch2))
- {
- curInd += 2;
- s[0] = fromoc(ch1) * 8 + fromoc(ch2);
- if (isoc(ch3))
- {
- ++curInd;
- s[0] = s[0] * 8 + fromoc(ch3);
- }
- }
- else if (isoc(ch1) && isoc(ch2))
- {
- curInd += 2;
- s[0] = fromoc(ch1) * 8 + fromoc(ch2);
- }
- else raiseError();
-
- return s;
- #undef islowoc
- #undef isoc
- #undef fromoc
-}
-bkstring Pattern::parseHex()
-{
- #define to_low(x) (((x) >= 'A' && (x) <= 'Z') ? ((x) - 'A' + 'a') : (x))
- #define is_dig(x) ((x) >= '0' && (x) <= '9')
- #define is_hex(x) (is_dig(x) || (to_low(x) >= 'a' && to_low(x) <= 'f'))
- #define to_int(x) ((is_dig(x)) ? ((x) - '0') : (to_low(x) - 'a' + 10))
-
- int ci = curInd;
- char ch1 = (ci + 0 < (int)pattern.size()) ? pattern[ci + 0] : -1;
- char ch2 = (ci + 1 < (int)pattern.size()) ? pattern[ci + 1] : -1;
- bkstring s = " ";
-
- if (is_hex(ch1) && is_hex(ch2))
- {
- curInd += 2;
- s[0] = (to_int(ch1) << 4 & 0xF0) | (to_int(ch2) & 0x0F);
- }
-
- return s;
- #undef to_low
- #undef is_dig
- #undef is_hex
- #undef to_int
-}
-bkstring Pattern::parseEscape(bool & inv, bool & quo)
-{
- char ch = pattern[curInd++];
- bkstring classes = "";
-
- if (curInd > (int)pattern.size())
- {
- raiseError();
- return NULL;
- }
-
- quo = 0;
- inv = 0;
- switch (ch)
- {
- case 'p': classes = parsePosix(); break;
- case 'P': classes = "!!"; classes += parsePosix(); break;
- case 'd': classes = "0123456789"; break;
- case 'D': classes = "!!0123456789"; break;
- case 's': classes = " \t\r\n\f"; break;
- case 'S': classes = "!! \t\r\n\f"; break;
- case 'w': classes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; break;
- case 'W': classes = "!!abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; break;
- case '0': classes = parseOctal(); break;
- case 'x': classes = parseHex(); break;
-
- case 'Q': quo = 1; break;
- case 't': classes = "\t"; break;
- case 'r': classes = "\r"; break;
- case 'n': classes = "\n"; break;
- case 'f': classes = "\f"; break;
- case 'a': classes = "\a"; break;
- case 'e': classes = "\r"; break;
- default: classes = " "; classes[0] = ch; break;
- }
- if (classes.substr(0, 2) == "!!")
- {
- classes = classes.substr(2);
- inv = 1;
- }
- return classes;
-}
-NFANode * Pattern::parseRegisteredPattern(NFANode ** end)
-{
- int i, j;
- bkstring s;
- NFANode * ret = NULL;
- for (i = curInd; i < (int)pattern.size() && pattern[i] != '}'; ++i) { }
- if (pattern[i] != '}') { raiseError(); return NULL; }
- if (i == curInd + 1) { raiseError(); return NULL; } // {}
- if (
- !(
- (pattern[curInd] >= 'a' && pattern[curInd] <= 'z') ||
- (pattern[curInd] >= 'A' && pattern[curInd] <= 'Z') ||
- (pattern[curInd] == '_')
- )
- )
- {
- raiseError();
- return NULL;
- }
- for (j = curInd; !error && j < i; ++j)
- {
- if (
- !(
- (pattern[j] >= 'a' && pattern[j] <= 'z') ||
- (pattern[j] >= 'A' && pattern[j] <= 'Z') ||
- (pattern[j] >= '0' && pattern[j] <= '9') ||
- (pattern[j] == '_')
- )
- )
- {
- raiseError();
- return NULL;
- }
- }
- s = pattern.substr(curInd, i - curInd);
- if (registeredPatterns.find(s) == registeredPatterns.end()) raiseError();
- else
- {
- unsigned long oflags = flags;
- bkstring op = pattern;
- int ci = i + 1;
-
- pattern = registeredPatterns[s].first;
- curInd = 0;
- flags = registeredPatterns[s].second;
-
- --groupCount;
- ret = parse(0, 0, end);
-
- pattern = op;
- curInd = ci;
- flags = oflags;
- }
- if (error) { *end = ret = NULL; }
- return ret;
-}
-
-// look behind should interpret everything as a literal (except \\) since the
-// pattern must have a concrete length
-NFANode * Pattern::parseBehind(const bool pos, NFANode ** end)
-{
- bkstring t = "";
- while (curInd < (int)pattern.size() && pattern[curInd] != ')')
- {
- char ch = pattern[curInd++];
- t += " ";
- if (ch == '\\')
- {
- if (curInd + 1 >= (int)pattern.size())
- {
- raiseError();
- return *end = registerNode(new NFACharNode(' '));
- }
- ch = pattern[curInd++];
- }
- t[t.size() - 1] = ch;
- }
- if (curInd >= (int)pattern.size() || pattern[curInd] != ')') raiseError();
- else ++curInd;
- return *end = registerNode(new NFALookBehindNode(t, pos));
-}
-NFANode * Pattern::parseQuote()
-{
- bool done = 0;
- bkstring s = "";
-
- while (!done)
- {
- if (curInd >= (int)pattern.size())
- {
- raiseError();
- done = 1;
- }
- else if (pattern.substr(curInd, 2) == "\\E")
- {
- curInd += 2;
- done = 1;
- }
- else if (pattern[curInd] == '\\')
- {
- s += " ";
- s[s.size() - 1] = pattern[++curInd];
- ++curInd;
- }
- else
- {
- s += " ";
- s[s.size() - 1] = pattern[curInd++];
- }
- }
- if ((flags & Pattern::CASE_INSENSITIVE) != 0) return registerNode(new NFACIQuoteNode(s));
- return registerNode(new NFAQuoteNode(s));
-}
-NFANode * Pattern::parse(const bool inParen, const bool inOr, NFANode ** end)
-{
- NFANode * start, * cur, * next = NULL;
- bkstring t;
- int grc = groupCount++;
- bool inv, quo;
- bool ahead = 0, pos = 0, noncap = 0, indep = 0;
- unsigned long oldFlags = flags;
-
- if (inParen)
- {
- if (pattern[curInd] == '?')
- {
- ++curInd;
- --groupCount;
- if (pattern[curInd] == ':') { noncap = 1; ++curInd; grc = --nonCapGroupCount; }
- else if (pattern[curInd] == '=') { ++curInd; ahead = 1; pos = 1; }
- else if (pattern[curInd] == '!') { ++curInd; ahead = 1; pos = 0; }
- else if (pattern.substr(curInd, 2) == "<=") { curInd += 2; return parseBehind(1, end); }
- else if (pattern.substr(curInd, 2) == "<!") { curInd += 2; return parseBehind(0, end); }
- else if (pattern[curInd] == '>') { ++curInd; indep = 1; }
- else
- {
- bool negate = false, done = false;
- while (!done)
- {
- if (curInd >= (int)pattern.size())
- {
- raiseError();
- return NULL;
- }
- else if (negate)
- {
- switch (pattern[curInd])
- {
- case 'i': flags &= ~Pattern::CASE_INSENSITIVE; break;
- case 'd': flags &= ~Pattern::UNIX_LINE_MODE; break;
- case 'm': flags &= ~Pattern::MULTILINE_MATCHING; break;
- case 's': flags &= ~Pattern::DOT_MATCHES_ALL; break;
- case ':': done = true; break;
- case ')':
- ++curInd;
- *end = registerNode(new NFALookBehindNode("", true));
- return *end;
- case '-':
- default: raiseError(); return NULL;
- }
- }
- else
- {
- switch (pattern[curInd])
- {
- case 'i': flags |= Pattern::CASE_INSENSITIVE; break;
- case 'd': flags |= Pattern::UNIX_LINE_MODE; break;
- case 'm': flags |= Pattern::MULTILINE_MATCHING; break;
- case 's': flags |= Pattern::DOT_MATCHES_ALL; break;
- case ':': done = true; break;
- case '-': negate = true; break;
- case ')':
- ++curInd;
- *end = registerNode(new NFALookBehindNode("", true));
- return *end;
- default: raiseError(); return NULL;
- }
- }
- ++curInd;
- }
- noncap = 1;
- grc = --nonCapGroupCount;
- }
- if (noncap) cur = start = registerNode(new NFAGroupHeadNode(grc));
- else cur = start = registerNode(new NFASubStartNode);
- }
- else cur = start = registerNode(new NFAGroupHeadNode(grc));
- }
- else cur = start = registerNode(new NFASubStartNode);
- while (curInd < (int)pattern.size())
- {
- char ch = pattern[curInd++];
-
- next = NULL;
- if (error) return NULL;
- switch (ch)
- {
- case '^':
- if ((flags & Pattern::MULTILINE_MATCHING) != 0) next = registerNode(new NFAStartOfLineNode);
- else next = registerNode(new NFAStartOfInputNode);
- break;
- case '$':
- if ((flags & Pattern::MULTILINE_MATCHING) != 0) next = registerNode(new NFAEndOfLineNode);
- else next = registerNode(new NFAEndOfInputNode(0));
- break;
- case '|':
- --groupCount;
- cur->next = registerNode(new NFAAcceptNode);
- cur = start = registerNode(new NFAOrNode(start, parse(inParen, 1)));
- break;
- case '\\':
- if (curInd < (int)pattern.size())
- {
- bool eoi = 0;
- switch (pattern[curInd])
- {
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9': next = parseBackref(); break;
- case 'A': ++curInd; next = registerNode(new NFAStartOfInputNode); break;
- case 'B': ++curInd; next = registerNode(new NFAWordBoundaryNode(0)); break;
- case 'b': ++curInd; next = registerNode(new NFAWordBoundaryNode(1)); break;
- case 'G': ++curInd; next = registerNode(new NFAEndOfMatchNode); break;
- case 'Z': eoi = 1;
- case 'z': ++curInd; next = registerNode(new NFAEndOfInputNode(eoi)); break;
- default:
- t = parseEscape(inv, quo);
- if (!quo)
- {
- if (t.size() > 1 || inv)
- {
- if ((flags & Pattern::CASE_INSENSITIVE) != 0) next = registerNode(new NFACIClassNode(t, inv));
- else next = registerNode(new NFAClassNode(t, inv));
- }
- else
- {
- next = registerNode(new NFACharNode(t[0]));
- }
- }
- else
- {
- next = parseQuote();
- }
- }
- }
- else raiseError();
- break;
- case '[':
- if ((flags & Pattern::CASE_INSENSITIVE) == 0)
- {
- NFAClassNode * clazz = new NFAClassNode();
- bkstring s = parseClass();
- for (int i = 0; i < (int)s.size(); ++i) clazz->vals[s[i]] = 1;
- next = registerNode(clazz);
- }
- else
- {
- NFACIClassNode * clazz = new NFACIClassNode();
- bkstring s = parseClass();
- for (int i = 0; i < (int)s.size(); ++i) clazz->vals[to_lower(s[i])] = 1;
- next = registerNode(clazz);
- }
- break;
- case '.':
- {
- bool useN = 1, useR = 1;
- NFAClassNode * clazz = new NFAClassNode(1);
- if ((flags & Pattern::UNIX_LINE_MODE) != 0) useR = 0;
- if ((flags & Pattern::DOT_MATCHES_ALL) != 0) useN = useR = 0;
- if (useN) clazz->vals['\n'] = 1;
- if (useR) clazz->vals['\r'] = 1;
- next = registerNode(clazz);
- }
- break;
- case '(':
- {
- NFANode * end, * t1, * t2;
- t1 = parse(1, 0, &end);
- if (!t1) raiseError();
- else if (t1->isGroupHeadNode() && (t2 = quantifyGroup(t1, end, grc)) != NULL)
- {
- cur->next = t2;
- cur = t2->next;
- }
- else
- {
- cur->next = t1;
- cur = end;
- }
- }
- break;
- case ')':
- if (!inParen) raiseError();
- else if (inOr)
- {
- --curInd;
- cur = cur->next = registerNode(new NFAAcceptNode);
- flags = oldFlags;
- return start;
- }
- else
- {
- if (ahead)
- {
- cur = cur->next = registerNode(new NFAAcceptNode);
- flags = oldFlags;
- return *end = registerNode(new NFALookAheadNode(start, pos));
- }
- else if (indep)
- {
- cur = cur->next = registerNode(new NFAAcceptNode);
- flags = oldFlags;
- return *end = registerNode(new NFAPossessiveQuantifierNode(this, start, 1, 1));
- }
- else // capping or noncapping, it doesnt matter
- {
- *end = cur = cur->next = registerNode(new NFAGroupTailNode(grc));
- next = quantifyGroup(start, *end, grc);
- if (next)
- {
- start = next;
- *end = next->next;
- }
- flags = oldFlags;
- return start;
- }
- }
- break;
- case '{': // registered pattern
- cur->next = parseRegisteredPattern(&next);
- if (cur->next) cur = next;
- break;
- case '*':
- case '+':
- case '?':
-// case '}':
-// case ']':
- raiseError();
- break;
- default:
- if ((flags & Pattern::CASE_INSENSITIVE) != 0) next = registerNode(new NFACICharNode(ch));
- else next = registerNode(new NFACharNode(ch));
- break;
- }
- if (next)
- {
- cur = cur->next = quantify(next);
- }
- }
- if (inParen) raiseError();
- else
- {
- if (inOr) cur = cur->next = registerNode(new NFAAcceptNode);
- if (end) *end = cur;
- }
-
- flags = oldFlags;
- if (error) return NULL;
-
- return start;
-}
-
-Pattern * Pattern::compile(const bkstring & pattern, const unsigned long mode)
-{
- Pattern * p = new Pattern(pattern);
- NFANode * end;
-
- p->flags = mode;
- if ((mode & Pattern::LITERAL) != 0)
- {
- p->head = p->registerNode(new NFAStartNode);
- if ((mode & Pattern::CASE_INSENSITIVE) != 0) p->head->next = p->registerNode(new NFACIQuoteNode(pattern));
- else p->head->next = p->registerNode(new NFAQuoteNode(pattern));
- p->head->next->next = p->registerNode(new NFAEndNode);
- }
- else
- {
- p->head = p->parse(0, 0, &end);
- if (!p->head)
- {
- delete p;
- p = NULL;
- }
- else
- {
- if (!(p->head && p->head->isStartOfInputNode()))
- {
- NFANode * n = p->registerNode(new NFAStartNode);
- n->next = p->head;
- p->head = n;
- }
- end->next = p->registerNode(new NFAEndNode);
- }
- }
- if (p != NULL)
- {
- p->matcher = new Matcher(p, "");
- }
-
- return p;
-}
-
-Pattern * Pattern::compileAndKeep(const bkstring & pattern, const unsigned long mode)
-{
- Pattern * ret = NULL;
- std::map<bkstring, Pattern*>::iterator it = compiledPatterns.find(pattern);
-
- if (it != compiledPatterns.end())
- {
- ret = it->second;
- }
- else
- {
- ret = compile(pattern, mode);
- compiledPatterns[pattern] = ret;
- }
-
- return ret;
-}
-bkstring Pattern::replace(const bkstring & pattern, const bkstring & str,
- const bkstring & replacementText, const unsigned long mode)
-{
- bkstring ret;
- Pattern * p = Pattern::compile(pattern, mode);
- if (p)
- {
- ret = p->replace(str, replacementText);
- delete p;
- }
- return ret;
-}
-
-std::vector<bkstring> Pattern::split(const bkstring & pattern, const bkstring & str, const bool keepEmptys,
- const unsigned long limit, const unsigned long mode)
-{
- std::vector<bkstring> ret;
- Pattern * p = Pattern::compile(pattern, mode);
- if (p)
- {
- ret = p->split(str, keepEmptys, limit);
- delete p;
- }
- return ret;
-}
-
-std::vector<bkstring> Pattern::findAll(const bkstring & pattern, const bkstring & str, const unsigned long mode)
-{
- std::vector<bkstring> ret;
- Pattern * p = Pattern::compile(pattern, mode);
- if (p)
- {
- ret = p->findAll(str);
- delete p;
- }
- return ret;
-}
-
-bool Pattern::matches(const bkstring & pattern, const bkstring & str, const unsigned long mode)
-{
- bool ret = 0;
- Pattern * p = compile(pattern, mode);
-
- if (p)
- {
- ret = p->matches(str);
- delete p;
- }
-
- return ret;
-}
-
-bool Pattern::registerPattern(const bkstring & name, const bkstring & pattern, const unsigned long mode)
-{
- Pattern * p = Pattern::compile(pattern, mode);
- if (!p) return 0;
- Pattern::registeredPatterns[name] = std::make_pair(pattern, mode);
- delete p;
- return 1;
-}
-
-void Pattern::unregisterPatterns()
-{
- registeredPatterns.clear();
-}
-void Pattern::clearPatternCache()
-{
- std::map<bkstring, Pattern*>::iterator it;
- for (it = compiledPatterns.begin(); it != compiledPatterns.end(); ++it)
- {
- delete it->second;
- }
- compiledPatterns.clear();
-}
-
-std::pair<bkstring, int> Pattern::findNthMatch(const bkstring & pattern, const bkstring & str,
- const int matchNum, const unsigned long mode)
-{
- std::pair<bkstring, int> ret;
- Pattern * p = Pattern::compile(pattern, mode);
-
- ret.second = -1;
- if (p)
- {
- int i = -1;
- p->matcher->setString(str);
- while (i < matchNum && p->matcher->findNextMatch()) { ++i; }
- if (i == matchNum && p->matcher->getStartingIndex() >= 0)
- {
- ret.first = p->matcher->getGroup(0);
- ret.second = p->matcher->getStartingIndex();
- }
- delete p;
- }
-
- return ret;
-}
-
-Pattern::~Pattern()
-{
- if (matcher) delete matcher;
- for (std::map<NFANode*, bool>::iterator it = nodes.begin(); it != nodes.end(); ++it)
- {
- delete it->first;
- }
-}
-bkstring Pattern::replace(const bkstring & str, const bkstring & replacementText)
-{
- int li = 0;
- bkstring ret = "";
-
- matcher->setString(str);
- while (matcher->findNextMatch())
- {
- ret += str.substr(li, matcher->getStartingIndex() - li);
- ret += matcher->replaceWithGroups(replacementText);
- li = matcher->getEndingIndex();
- }
- ret += str.substr(li);
-
- return ret;
-}
-std::vector<bkstring> Pattern::split(const bkstring & str, const bool keepEmptys, const unsigned long limit)
-{
- unsigned long lim = (limit == 0 ? MAX_QMATCH : limit);
- int li = 0;
- std::vector<bkstring> ret;
-
- matcher->setString(str);
-
- while (matcher->findNextMatch() && ret.size() < lim)
- {
- if (matcher->getStartingIndex() == 0 && keepEmptys) ret.push_back("");
- if ((matcher->getStartingIndex() != matcher->getEndingIndex()) || keepEmptys)
- {
- if (li != matcher->getStartingIndex() || keepEmptys)
- {
- ret.push_back(str.substr(li, matcher->getStartingIndex() - li));
- }
- li = matcher->getEndingIndex();
- }
- }
- if (li < (int)str.size()) ret.push_back(str.substr(li));
-
- return ret;
-}
-std::vector<bkstring> Pattern::findAll(const bkstring & str)
-{
- matcher->setString(str);
- return matcher->findAll();
-}
-bool Pattern::matches(const bkstring & str)
-{
- matcher->setString(str);
- return matcher->matches();
-}
-unsigned long Pattern::getFlags() const
-{
- return flags;
-}
-bkstring Pattern::getPattern() const
-{
- return pattern;
-}
-Matcher * Pattern::createMatcher(const bkstring & str)
-{
- return new Matcher(this, str);
-}
-
-// NFANode
-
-NFANode::NFANode() { next = NULL; }
-NFANode::~NFANode() { }
-void NFANode::findAllNodes(std::map<NFANode*, bool> & soFar)
-{
- if (soFar.find(this) == soFar.end()) return;
- soFar[this] = 1;
- if (next) next->findAllNodes(soFar);
-}
-
-// NFACharNode
-
-NFACharNode::NFACharNode(const char c) { ch = c; }
-int NFACharNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
-{
- if (curInd < (int)str.size() && str[curInd] == ch) return next->match(str, matcher, curInd + 1);
- return -1;
-}
-
-// NFACICharNode
-
-NFACICharNode::NFACICharNode(const char c) { ch = to_lower(c); }
-int NFACICharNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
-{
- if (curInd < (int)str.size() && to_lower(str[curInd]) == ch) return next->match(str, matcher, curInd + 1);
- return -1;
-}
-
-// NFAStartNode
-
-NFAStartNode::NFAStartNode() { }
-int NFAStartNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
-{
- int ret = -1, ci = curInd;
-
- matcher->starts[0] = curInd;
- if ((matcher->getFlags() & Matcher::MATCH_ENTIRE_STRING) == (unsigned int)Matcher::MATCH_ENTIRE_STRING)
- {
- if (curInd != 0)
- {
- matcher->starts[0] = -1;
- return -1;
- }
- return next->match(str, matcher, 0);
- }
- while ((ret = next->match(str, matcher, ci)) == -1 && ci < (int)str.size())
- {
- matcher->clearGroups();
- matcher->starts[0] = ++ci;
- }
- if (ret < 0) matcher->starts[0] = -1;
- return ret;
-}
-
-// NFAEndNode
-
-NFAEndNode::NFAEndNode() { }
-int NFAEndNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
-{
- matcher->ends[0] = curInd;
- if ((matcher->getFlags() & Matcher::MATCH_ENTIRE_STRING) != 0)
- {
- if (curInd == (int)str.size()) return curInd;
- matcher->ends[0] = -1;
- return -1;
- }
- return curInd;
-}
-
-// NFAQuantifierNode
-
-void NFAQuantifierNode::findAllNodes(std::map<NFANode*, bool> & soFar)
-{
- inner->findAllNodes(soFar);
- NFANode::findAllNodes(soFar);
-}
-NFAQuantifierNode::NFAQuantifierNode(Pattern * pat, NFANode * internal, const int minMatch, const int maxMatch)
-{
- inner = internal;
- inner->next = pat->registerNode(new NFAAcceptNode);
- min = (minMatch < Pattern::MIN_QMATCH) ? Pattern::MIN_QMATCH : minMatch;
- max = (maxMatch > Pattern::MAX_QMATCH) ? Pattern::MAX_QMATCH : maxMatch;
-}
-
-int NFAQuantifierNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
-{
- int i0, i1, i2 = 0;
-
- i0 = i1 = curInd;
- while (i2 < min)
- {
-
- ++i2;
- i1 = inner->match(str, matcher, i0);
- if (i1 <= i0) return i1; // i1 < i0 means i1 is -1
- i0 = i1;
- }
-
- return i1;
-}
-// NFAGreedyQuantifierNode
-
-NFAGreedyQuantifierNode::NFAGreedyQuantifierNode(Pattern * pat, NFANode * internal, const int minMatch, const int maxMatch)
- : NFAQuantifierNode(pat, internal, minMatch, maxMatch) { }
-int NFAGreedyQuantifierNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
-{
- int t = NFAQuantifierNode::match(str, matcher, curInd);
- if (t != -1) return matchInternal(str, matcher, t, min);
- return t;
-}
-int NFAGreedyQuantifierNode::matchInternal(const bkstring & str, Matcher * matcher, const int curInd, const int soFar) const
-{
- if (soFar >= max) return next->match(str, matcher, curInd);
-
- int i, j;
-
- i = inner->match(str, matcher, curInd);
- if (i != -1)
- {
- j = matchInternal(str, matcher, i, soFar + 1);
- if (j != -1) return j;
- }
- return next->match(str, matcher, curInd);
-}
-
-// NFALazyQuantifierNode
-
-NFALazyQuantifierNode::NFALazyQuantifierNode(Pattern * pat, NFANode * internal, const int minMatch, const int maxMatch)
- : NFAQuantifierNode(pat, internal, minMatch, maxMatch) { }
-int NFALazyQuantifierNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
-{
- int i, j, m = NFAQuantifierNode::match(str, matcher, curInd);
-
- if (m == -1) return -1;
-
- for (i = min; i < max; ++i)
- {
- j = next->match(str, matcher, m);
- if (j == -1)
- {
- j = inner->match(str, matcher, m);
- // if j < m, then j is -1, so we bail.
- // if j == m, then we would just go and call next->match on the same index,
- // but it already failed trying to match right there, so we know we can
- // just bail
- if (j <= m) return -1;
- m = j;
- }
- else return j;
- }
- return next->match(str, matcher, m);
-}
-
-// NFAPossessiveQuantifierNode
-
-NFAPossessiveQuantifierNode::NFAPossessiveQuantifierNode(Pattern * pat, NFANode * internal, const int minMatch, const int maxMatch)
- : NFAQuantifierNode(pat, internal, minMatch, maxMatch) { }
-int NFAPossessiveQuantifierNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
-{
- int i, j, m = NFAQuantifierNode::match(str, matcher, curInd);
-
- if (m == -1) return -1;
- for (i = min; i < max; ++i)
- {
- j = inner->match(str, matcher, m);
- if (j <= m) return next->match(str, matcher, m);
- m = j;
- }
- return next->match(str, matcher, m);
-}
-
-// NFAAcceptNode
-
-NFAAcceptNode::NFAAcceptNode() { }
-int NFAAcceptNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
-{
- if (!next) return curInd;
- else return next->match(str, matcher, curInd);
-}
-
-// NFAClassNode
-
-NFAClassNode::NFAClassNode(const bool invert)
-{
- inv = invert;
-}
-NFAClassNode::NFAClassNode(const bkstring & clazz, const bool invert)
-{
- inv = invert;
- for (int i = 0; i < (int)clazz.size(); ++i) vals[clazz[i]] = 1;
-}
-int NFAClassNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
-{
- if (curInd < (int)str.size() && ((vals.find(str[curInd]) != vals.end()) ^ inv))
- {
- return next->match(str, matcher, curInd + 1);
- }
- return -1;
-}
-
-// NFACIClassNode
-
-NFACIClassNode::NFACIClassNode(const bool invert)
-{
- inv = invert;
-}
-NFACIClassNode::NFACIClassNode(const bkstring & clazz, const bool invert)
-{
- inv = invert;
- for (int i = 0; i < (int)clazz.size(); ++i) vals[to_lower(clazz[i])] = 1;
-}
-int NFACIClassNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
-{
- if (curInd < (int)str.size() && ((vals.find(to_lower(str[curInd])) != vals.end()) ^ inv))
- {
- return next->match(str, matcher, curInd + 1);
- }
- return -1;
-}
-
-// NFASubStartNode
-
-NFASubStartNode::NFASubStartNode() { }
-int NFASubStartNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
-{
- return next->match(str, matcher, curInd);
-}
-
-// NFAOrNode
-
-NFAOrNode::NFAOrNode(NFANode * first, NFANode * second) : one(first), two(second) { }
-void NFAOrNode::findAllNodes(std::map<NFANode*, bool> & soFar)
-{
- if (one) one->findAllNodes(soFar);
- if (two) two->findAllNodes(soFar);
- NFANode::findAllNodes(soFar);
-}
-int NFAOrNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
-{
- int ci = one->match(str, matcher, curInd);
-
- if (ci != -1) ci = next->match(str, matcher, ci);
- if (ci != -1) return ci;
- if (ci == -1) ci = two->match(str, matcher, curInd);
- if (ci != -1) ci = next->match(str, matcher, ci);
- return ci;
-}
-
-// NFAQuoteNode
-
-NFAQuoteNode::NFAQuoteNode(const bkstring & quoted) : qStr(quoted) { }
-int NFAQuoteNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
-{
- if (curInd + qStr.size() > str.size()) return -1;
- if (str.substr(curInd, qStr.size()) != qStr) return -1;
- return next->match(str, matcher, curInd + qStr.size());
-}
-
-// NFACIQuoteNode
-
-NFACIQuoteNode::NFACIQuoteNode(const bkstring & quoted) : qStr(quoted) { }
-int NFACIQuoteNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
-{
- if (curInd + qStr.size() > str.size()) return -1;
- if (str_icmp(str.substr(curInd, qStr.size()).c_str(), qStr.c_str())) return -1;
- return next->match(str, matcher, qStr.size());
-}
-
-// NFALookAheadNode
-
-NFALookAheadNode::NFALookAheadNode(NFANode * internal, const bool positive) : NFANode(), pos(positive), inner(internal) { }
-void NFALookAheadNode::findAllNodes(std::map<NFANode*, bool> & soFar)
-{
- if (inner) inner->findAllNodes(soFar);
- NFANode::findAllNodes(soFar);
-}
-int NFALookAheadNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
-{
- return ((inner->match(str, matcher, curInd) == -1) ^ pos) ? next->match(str, matcher, curInd) : -1;
-}
-
-// NFALookBehindNode
-
-NFALookBehindNode::NFALookBehindNode(const bkstring & str, const bool positive) : pos(positive), mStr(str) { }
-int NFALookBehindNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
-{
- if (pos)
- {
- if (curInd < (int)mStr.size()) return -1;
- if (str.substr(curInd - mStr.size(), mStr.size()) == mStr) return next->match(str, matcher, curInd);
- }
- else
- {
- if (curInd < (int)mStr.size()) return next->match(str, matcher, curInd);
- if (str.substr(curInd - mStr.size(), mStr.size()) == mStr) return -1;
- return next->match(str, matcher, curInd);
- }
- return -1;
-}
-
-// NFAStartOfLineNode
-
-NFAStartOfLineNode::NFAStartOfLineNode() { }
-int NFAStartOfLineNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
-{
- if (curInd == 0 || str[curInd - 1] == '\n' || str[curInd - 1] == '\r')
- {
- return next->match(str, matcher, curInd);
- }
- return -1;
-}
-
-// NFAEndOfLineNode
-
-NFAEndOfLineNode::NFAEndOfLineNode() { }
-int NFAEndOfLineNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
-{
- if (curInd >= (int)str.size() || str[curInd] == '\n' || str[curInd] == '\r')
- {
- return next->match(str, matcher, curInd);
- }
- return -1;
-}
-
-// NFAReferenceNode
-
-NFAReferenceNode::NFAReferenceNode(const int groupIndex) : gi(groupIndex) { }
-int NFAReferenceNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
-{
- int len = matcher->ends[gi] - matcher->starts[gi];
- int ni = -1;
- if (gi < 1 || matcher->ends[gi] < matcher->starts[gi] || len == 0) ni = curInd;
- else if (curInd + len > (int)str.size()) return -1;
- else if (str.substr(curInd, len) != str.substr(matcher->starts[gi], len)) return -1;
- else ni = curInd + len;
-
- return next->match(str, matcher, ni);
-}
-
-// NFAStartOfInputNode
-
-NFAStartOfInputNode::NFAStartOfInputNode() { }
-int NFAStartOfInputNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
-{
- if (curInd == 0) return next->match(str, matcher, curInd);
- return -1;
-}
-
-// NFAEndOfInputNode
-
-NFAEndOfInputNode::NFAEndOfInputNode(const bool lookForTerm) : term(lookForTerm) { }
-int NFAEndOfInputNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
-{
- int len = (int)str.size();
- if (curInd == len) return next->match(str, matcher, curInd);
- else if (term)
- {
- if (curInd == len - 1 && (str[curInd] == '\r' || str[curInd] == '\n'))
- {
- return next->match(str, matcher, curInd);
- }
- else if (curInd == len - 2 && str.substr(curInd, 2) == "\r\n")
- {
- return next->match(str, matcher, curInd);
- }
- }
- return -1;
-}
-
-// NFAWordBoundaryNode
-
-NFAWordBoundaryNode::NFAWordBoundaryNode(const bool positive) : pos(positive) { }
-int NFAWordBoundaryNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
-{
- int len = (int)str.size();
-
- char c1 = (curInd - 1 < len && curInd > 0) ? str[curInd - 1] : '\n';
- char c2 = (curInd < len) ? str[curInd ] : '\n';
-
- if (curInd == len) return next->match(str, matcher, curInd);
- bool ok = is_alpha(c1) != is_alpha(c2);
- if (ok && pos) return next->match(str, matcher, curInd);
- return -1;
-}
-
-// NFAEndOfMatchNode
-
-NFAEndOfMatchNode::NFAEndOfMatchNode() { }
-int NFAEndOfMatchNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
-{
- if (curInd == matcher->lm) return next->match(str, matcher, curInd);
- return -1;
-}
-
-// NFAGroupHeadNode
-
-NFAGroupHeadNode::NFAGroupHeadNode(const int groupIndex) : gi(groupIndex) { }
-int NFAGroupHeadNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
-{
- int ret, o = matcher->starts[gi];
-
- matcher->starts[gi] = curInd;
- ret = next->match(str, matcher, curInd);
- if (ret < 0) matcher->starts[gi] = o;
-
- return ret;
-}
-
-// NFAGroupTailNode
-
-NFAGroupTailNode::NFAGroupTailNode(const int groupIndex) : gi(groupIndex) { }
-int NFAGroupTailNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
-{
- int ret, o = matcher->ends[gi];
-
- matcher->ends[gi] = curInd;
- ret = next->match(str, matcher, curInd);
- if (ret < 0) matcher->ends[gi] = o;
-
- return ret;
-}
-
-// NFAGroupLoopPrologueNode
-
-NFAGroupLoopPrologueNode::NFAGroupLoopPrologueNode(const int groupIndex) : gi(groupIndex) { }
-int NFAGroupLoopPrologueNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
-{
- int ret, o1 = matcher->groups[gi], o2 = matcher->groupPos[gi], o3 = matcher->groupIndeces[gi];
-
- matcher->groups[gi] = 0;
- matcher->groupPos[gi] = 0;
- matcher->groupIndeces[gi] = -1;
- ret = next->match(str, matcher, curInd);
- if (ret < 0)
- {
- matcher->groups[gi] = o1;
- matcher->groupPos[gi] = o2;
- matcher->groupIndeces[gi] = o3;
- }
-
- return ret;
-}
-
-// NFAGroupLoopNode
-
-NFAGroupLoopNode::NFAGroupLoopNode(NFANode * internal, const int minMatch, const int maxMatch,
- const int groupIndex, const int matchType)
-{
- inner = internal;
- min = minMatch;
- max = maxMatch;
- gi = groupIndex;
- type = matchType;
-}
-void NFAGroupLoopNode::findAllNodes(std::map<NFANode*, bool> & soFar)
-{
- if (inner) inner->findAllNodes(soFar);
- NFANode::findAllNodes(soFar);
-}
-int NFAGroupLoopNode::match(const bkstring & str, Matcher * matcher, const int curInd) const
-{
- bool b = (curInd > matcher->groupIndeces[gi]);
-
- if (b && matcher->groups[gi] < min)
- {
- ++matcher->groups[gi];
- int o = matcher->groupIndeces[gi];
- matcher->groupIndeces[gi] = curInd;
- int ret = inner->match(str, matcher, curInd);
- if (ret < 0)
- {
- matcher->groupIndeces[gi] = o;
- --matcher->groups[gi];
- }
- return ret;
- }
- else if (!b || matcher->groups[gi] >= max)
- {
- return next->match(str, matcher, curInd);
- }
- else
- {
- switch (type)
- {
- case 0: return matchGreedy(str, matcher, curInd);
- case 1: return matchLazy(str, matcher, curInd);
- case 2: return matchPossessive(str, matcher, curInd);
- }
- }
- return -1;
-}
-int NFAGroupLoopNode::matchGreedy(const bkstring & str, Matcher * matcher, const int curInd) const
-{
- int o = matcher->groupIndeces[gi]; // save our info for backtracking
- matcher->groupIndeces[gi] = curInd; // move along
- ++matcher->groups[gi];
- int ret = inner->match(str, matcher, curInd); // match internally
- if (ret < 0)
- { // if we failed, then restore info and match next
- --matcher->groups[gi];
- matcher->groupIndeces[gi] = o;
- ret = next->match(str, matcher, curInd);
- }
- return ret;
-}
-int NFAGroupLoopNode::matchLazy(const bkstring & str, Matcher * matcher, const int curInd) const
-{
- int ret = next->match(str, matcher, curInd); // be lazy, just go on
- if (ret < 0)
- {
- int o = matcher->groupIndeces[gi]; // save info for backtracking
- matcher->groupIndeces[gi] = curInd; // advance our position
- ++matcher->groups[gi];
- ret = inner->match(str, matcher, curInd); // match our internal stuff
- if (ret < 0) // if we failed, then restore the info
- {
- --matcher->groups[gi];
- matcher->groupIndeces[gi] = o;
- }
- }
- return ret;
-}
-int NFAGroupLoopNode::matchPossessive(const bkstring & str, Matcher * matcher, const int curInd) const
-{
- int o = matcher->groupIndeces[gi]; // save info for backtracking
- matcher->groupPos[gi] = matcher->groups[gi]; // set a flag stating we have matcher at least this much
- matcher->groupIndeces[gi] = curInd; // move along
- ++matcher->groups[gi];
- int ret = inner->match(str, matcher, curInd); // try and match again
- if (ret < 0)
- { // if we fail, back off, but to an extent
- --matcher->groups[gi];
- matcher->groupIndeces[gi] = o;
- if (matcher->groups[gi] == matcher->groupPos[gi]) ret = next->match(str, matcher, curInd);
- }
- return ret;
-}
-
-#ifdef _WIN32
- #pragma warning(pop)
-#endif
diff --git a/plugins/SmileyAdd/src/regexp/Pattern.h b/plugins/SmileyAdd/src/regexp/Pattern.h
deleted file mode 100644
index bb16ad90fa..0000000000
--- a/plugins/SmileyAdd/src/regexp/Pattern.h
+++ /dev/null
@@ -1,1663 +0,0 @@
-#ifndef __PATTERN_H__
-#define __PATTERN_H__
-
-#ifdef _WIN32
- #pragma warning(disable:4786)
-#endif
-
-#include <vector>
-#include <map>
-
-#include "bkstring.h"
-
-class Matcher;
-class NFANode;
-class NFAQuantifierNode;
-
-/**
- This pattern class is very similar in functionality to Java's
- java.util.regex.Pattern class. The pattern class represents an immutable
- regular expression object. Instead of having a single object contain both the
- regular expression object and the matching object, instead the two objects are
- split apart. The {@link Matcher Matcher} class represents the maching
- object.
-
- The Pattern class works primarily off of "compiled" patterns. A typical
- instantiation of a regular expression looks like:
-
- <pre>
- Pattern * p = Pattern::compile("a*b");
- Matcher * m = p->createMatcher("aaaaaab");
- if (m->matches()) ...
- </pre>
-
- However, if you do not need to use a pattern more than once, it is often times
- okay to use the Pattern's static methods insteads. An example looks like this:
-
- <pre>
- if (Pattern::matches("a*b", "aaaab")) { ... }
- </pre>
-
- This class does not currently support unicode. The unicode update for this
- class is coming soon.
-
- This class is partially immutable. It is completely safe to call createMatcher
- concurrently in different threads, but the other functions (e.g. split) should
- not be called concurrently on the same <code>Pattern</code>.
-
- <table border="0" cellpadding="1" cellspacing="0">
- <tr align="left" bgcolor="#CCCCFF">
- <td>
- <b>Construct</b>
- </td>
- <td>
- <b>Matches</b>
- </th>
- </tr>
- <tr>
- <td colspan="2">
- &nbsp;
- </td>
- </tr>
- <tr>
- <td colspan="2">
- <b>Characters</b>
- </td>
- </tr>
- <tr>
- <td>
- <code><i>x</i></code>
- </td>
- <td>
- The character <code><i>x</i></code>
- </td>
- </tr>
- <tr>
- <td>
- <code>\\</code>
- </td>
- <td>
- The character <code>\</code>
- </td>
- </tr>
- <tr>
- <td>
- <code>\0<i>nn</i></code>
- </td>
- <td>
- The character with octal ASCII value <code><i>nn</i></code>
- </td>
- </tr>
- <tr>
- <td>
- <code>\0<i>nnn</i></code>
- </td>
- <td>
- The character with octal ASCII value <code><i>nnn</i></code>
- </td>
- </tr>
- <tr>
- <td>
- <code>\x<i>hh</i></code>
- </td>
- <td>
- The character with hexadecimal ASCII value <code><i>hh</i></code>
- </td>
- </tr>
- <tr>
- <td>
- <code>\t</code>
- </td>
- <td>
- A tab character
- </td>
- </tr>
- <tr>
- <td>
- <code>\r</code>
- </td>
- <td>
- A carriage return character
- </td>
- </tr>
- <tr>
- <td>
- <code>\n</code>
- </td>
- <td>
- A new-line character
- </td>
- </tr>
- <tr>
- <td colspan="2">
- &nbsp;
- </td>
- </tr>
- <tr>
- <td>
- <b>Character Classes</b>
- </td>
- </tr>
- <tr>
- <td>
- <code>[abc]</code>
- </td>
- <td>
- Either <code>a</code>, <code>b</code>, or <code>c</code>
- </td>
- </tr>
- <tr>
- <td>
- <code>[^abc]</code>
- </td>
- <td>
- Any character but <code>a</code>, <code>b</code>, or <code>c</code>
- </td>
- </tr>
- <tr>
- <td>
- <code>[a-zA-Z]</code>
- </td>
- <td>
- Any character ranging from <code>a</code> thru <code>z</code>, or
- <code>A</code> thru <code>Z</code>
- </td>
- </tr>
- <tr>
- <td>
- <code>[^a-zA-Z]</code>
- </td>
- <td>
- Any character except those ranging from <code>a</code> thru
- <code>z</code>, or <code>A</code> thru <code>Z</code>
- </td>
- </tr>
- <tr>
- <td>
- <code>[a\-z]</code>
- </td>
- <td>
- Either <code>a</code>, <code>-</code>, or <code>z</code>
- </td>
- </tr>
- <tr>
- <td>
- <code>[a-z[A-Z]]</code>
- </td>
- <td>
- Same as <code>[a-zA-Z]</code>
- </td>
- </tr>
- <tr>
- <td>
- <code>[a-z&&[g-i]]</code>
- </td>
- <td>
- Any character in the intersection of <code>a-z</code> and
- <code>g-i</code>
- </td>
- </tr>
- <tr>
- <td>
- <code>[a-z&&[^g-i]]</code>
- </td>
- <td>
- Any character in <code>a-z</code> and not in <code>g-i</code>
- </td>
- </tr>
- <tr>
- <td colspan="2">
- &nbsp;
- </td>
- </tr>
- <tr>
- <td colspan="2">
- <b>Prefefined character classes</b>
- </td>
- </tr>
- <tr>
- <td>
- <code><b>.</b></code>
- </td>
- <td>
- Any character. Multiline matching must be compiled into the pattern for
- <code><b>.</b></code> to match a <code>\r</code> or a <code>\n</code>.
- Even if multiline matching is enabled, <code><b>.</b></code> will not
- match a <code>\r\n</code>, only a <code>\r</code> or a <code>\n</code>.
- </td>
- </tr>
- <tr>
- <td>
- <code>\d</code>
- </td>
- <td>
- <code>[0-9]</code>
- </td>
- </tr>
- <tr>
- <td>
- <code>\D</code>
- </td>
- <td>
- <code>[^\d]</code>
- </td>
- </tr>
- <tr>
- <td>
- <code>\s</code>
- </td>
- <td>
- <code>[&nbsp;\t\r\n\x0B]</code>
- </td>
- </tr>
- <tr>
- <td>
- <code>\S</code>
- </td>
- <td>
- <code>[^\s]</code>
- </td>
- </tr>
- <tr>
- <td>
- <code>\w</code>
- </td>
- <td>
- <code>[a-zA-Z0-9_]</code>
- </td>
- </tr>
- <tr>
- <td>
- <code>\W</code>
- </td>
- <td>
- <code>[^\w]</code>
- </td>
- </tr>
- <tr>
- <td colspan="2">
- &nbsp;
- </td>
- </tr>
- <tr>
- <td colspan="2">
- <b>POSIX character classes
- </td>
- </tr>
- <tr>
- <td>
- <code>\p{Lower}</code>
- </td>
- <td>
- <code>[a-z]</code>
- </td>
- </tr>
- <tr>
- <td>
- <code>\p{Upper}</code>
- </td>
- <td>
- <code>[A-Z]</code>
- </td>
- </tr>
- <tr>
- <td>
- <code>\p{ASCII}</code>
- </td>
- <td>
- <code>[\x00-\x7F]</code>
- </td>
- </tr>
- <tr>
- <td>
- <code>\p{Alpha}</code>
- </td>
- <td>
- <code>[a-zA-Z]</code>
- </td>
- </tr>
- <tr>
- <td>
- <code>\p{Digit}</code>
- </td>
- <td>
- <code>[0-9]</code>
- </td>
- </tr>
- <tr>
- <td>
- <code>\p{Alnum}</code>
- </td>
- <td>
- <code>[\w&&[^_]]</code>
- </td>
- </tr>
- <tr>
- <td>
- <code>\p{Punct}</code>
- </td>
- <td>
- <code>[!"#$%&'()*+,-./:;&lt;=&gt;?@[\]^_`{|}~]</code>
- </td>
- </tr>
- <tr>
- <td>
- <code>\p{XDigit}</code>
- </td>
- <td>
- <code>[a-fA-F0-9]</code>
- </td>
- </tr>
- <tr>
- <td colspan="2">
- &nbsp;
- </td>
- </tr>
- <tr>
- <td colspan="2">
- <b>Boundary Matches</b>
- </td>
- </tr>
- <tr>
- <td>
- <code>^</code>
- </td>
- <td>
- The beginning of a line. Also matches the beginning of input.
- </td>
- </tr>
- <tr>
- <td>
- <code>$</code>
- </td>
- <td>
- The end of a line. Also matches the end of input.
- </td>
- </tr>
- <tr>
- <td>
- <code>\b</code>
- </td>
- <td>
- A word boundary
- </td>
- </tr>
- <tr>
- <td>
- <code>\B</code>
- </td>
- <td>
- A non word boundary
- </td>
- </tr>
- <tr>
- <td>
- <code>\A</code>
- </td>
- <td>
- The beginning of input
- </td>
- </tr>
- <tr>
- <td>
- <code>\G</code>
- </td>
- <td>
- The end of the previous match. Ensures that a "next" match will only
- happen if it begins with the character immediately following the end of
- the "current" match.
- </td>
- </tr>
- <tr>
- <td>
- <code>\Z</code>
- </td>
- <td>
- The end of input. Will also match if there is a single trailing
- <code>\r\n</code>, a single trailing <code>\r</code>, or a single
- trailing <code>\n</code>.
- </td>
- </tr>
- <tr>
- <td>
- <code>\z</code>
- </td>
- <td>
- The end of input
- </td>
- </tr>
- <tr>
- <td>
- &nbsp;
- </td>
- </tr>
- <tr>
- <td colspan="2">
- <b>Greedy Quantifiers</b>
- </td>
- </tr>
- <tr>
- <td>
- <code><i>x?</i></code>
- </td>
- <td>
- <i>x</i>, either zero times or one time
- </td>
- </tr>
- <tr>
- <td>
- <code><i>x*</i></code>
- </td>
- <td>
- <i>x</i>, zero or more times
- </td>
- </tr>
- <tr>
- <td>
- <code><i>x+</i></code>
- </td>
- <td>
- <i>x</i>, one or more times
- </td>
- </tr>
- <tr>
- <td>
- <code><i>x{n}</i></code>
- </td>
- <td>
- <i>x</i>, exactly n times
- </td>
- </tr>
- <tr>
- <td>
- <code><i>x{n,}</i></code>
- </td>
- <td>
- <i>x</i>, at least <code><i>n</i></code> times
- </td>
- </tr>
- <tr>
- <td>
- <code><i>x{,m}</i></code>
- </td>
- <td>
- <i>x</i>, at most <code><i>m</i></code> times
- </td>
- </tr>
- <tr>
- <td>
- <code><i>x{n,m}</i></code>
- </td>
- <td>
- <i>x</i>, at least <code><i>n</i></code> times and at most
- <code><i>m</i></code> times
- </td>
- </tr>
- <tr>
- <td>
- &nbsp;
- </td>
- </tr>
- <tr>
- <td colspan="2">
- <b>Possessive Quantifiers</b>
- </td>
- </tr>
- <tr>
- <td>
- <code><i>x?+</i></code>
- </td>
- <td>
- <i>x</i>, either zero times or one time
- </td>
- </tr>
- <tr>
- <td>
- <code><i>x*+</i></code>
- </td>
- <td>
- <i>x</i>, zero or more times
- </td>
- </tr>
- <tr>
- <td>
- <code><i>x++</i></code>
- </td>
- <td>
- <i>x</i>, one or more times
- </td>
- </tr>
- <tr>
- <td>
- <code><i>x{n}+</i></code>
- </td>
- <td>
- <i>x</i>, exactly n times
- </td>
- </tr>
- <tr>
- <td>
- <code><i>x{n,}+</i></code>
- </td>
- <td>
- <i>x</i>, at least <code><i>n</i></code> times
- </td>
- </tr>
- <tr>
- <td>
- <code><i>x{,m}+</i></code>
- </td>
- <td>
- <i>x</i>, at most <code><i>m</i></code> times
- </td>
- </tr>
- <tr>
- <td>
- <code><i>x{n,m}+</i></code>
- </td>
- <td>
- <i>x</i>, at least <code><i>n</i></code> times and at most
- <code><i>m</i></code> times
- </td>
- </tr>
- <tr>
- <td colspan="2">
- &nbsp;
- </td>
- </tr>
- <tr>
- <td colspan="2">
- <b>Reluctant Quantifiers</b>
- </td>
- </tr>
- <tr>
- <td>
- <code><i>x??</i></code>
- </td>
- <td>
- <i>x</i>, either zero times or one time
- </td>
- </tr>
- <tr>
- <td>
- <code><i>x*?</i></code>
- </td>
- <td>
- <i>x</i>, zero or more times
- </td>
- </tr>
- <tr>
- <td>
- <code><i>x+?</i></code>
- </td>
- <td>
- <i>x</i>, one or more times
- </td>
- </tr>
- <tr>
- <td>
- <code><i>x{n}?</i></code>
- </td>
- <td>
- <i>x</i>, exactly n times
- </td>
- </tr>
- <tr>
- <td>
- <code><i>x{n,}?</i></code>
- </td>
- <td>
- <i>x</i>, at least <code><i>n</i></code> times
- </td>
- </tr>
- <tr>
- <td>
- <code><i>x{,m}?</i></code>
- </td>
- <td>
- <i>x</i>, at most <code><i>m</i></code> times
- </td>
- </tr>
- <tr>
- <td>
- <code><i>x{n,m}?</i></code>
- </td>
- <td>
- <i>x</i>, at least <code><i>n</i></code> times and at most
- <code><i>m</i></code> times
- </td>
- </tr>
- <tr>
- <td>
- &nbsp;
- </td>
- </tr>
- <tr>
- <td colspan="2">
- <b>Operators</b>
- </td>
- </tr>
- <tr>
- <td>
- <code><i>xy</i></code>
- </td>
- <td>
- <code><i>x</i></code> then <code><i>y</i></code>
- </td>
- </tr>
- <tr>
- <td>
- <code><i>x</i></code>|<code><i>y</i></code>
- </td>
- <td>
- <code><i>x</i></code> or <code><i>y</i></code>
- </td>
- </tr>
- <tr>
- <td>
- <code>(<i>x</i>)</code>
- </td>
- <td>
- <code><i>x</i></code> as a capturing group
- </td>
- </tr>
- <tr>
- <td colspan="2">
- &nbsp;
- </td>
- </tr>
- <tr>
- <td colspan="2">
- <b>Quoting</b>
- </td>
- </tr>
- <tr>
- <td>
- <code>\Q</code>
- </td>
- <td>
- Nothing, but treat every character (including \s) literally until a
- matching <code>\E</code>
- </td>
- </tr>
- <tr>
- <td>
- <code>\E</code>
- </td>
- <td>
- Nothing, but ends its matching <code>\Q</code>
- </td>
- </tr>
- <tr>
- <td>
- &nbsp;
- </td>
- </tr>
- <tr>
- <td colspan="2">
- <b>Special Constructs</b>
- </td>
- </tr>
- <tr>
- <td>
- <code>(?:<i>x</i>)</code>
- </td>
- <td>
- <code><i>x</i></code>, but not as a capturing group
- </td>
- </tr>
- <tr>
- <td>
- <code>(?=<i>x</i>)</code>
- </td>
- <td>
- <code><i>x</i></code>, via positive lookahead. This means that the
- expression will match only if it is trailed by <code><i>x</i></code>.
- It will not "eat" any of the characters matched by
- <code><i>x</i></code>.
- </td>
- </tr>
- <tr>
- <td>
- <code>(?!<i>x</i>)</code>
- </td>
- <td>
- <code><i>x</i></code>, via negative lookahead. This means that the
- expression will match only if it is not trailed by
- <code><i>x</i></code>. It will not "eat" any of the characters
- matched by <code><i>x</i></code>.
- </td>
- </tr>
- <tr>
- <td>
- <code>(?<=<i>x</i>)</code>
- </td>
- <td>
- <code><i>x</i></code>, via positive lookbehind. <code><i>x</i></code>
- cannot contain any quantifiers.
- </td>
- </tr>
- <tr>
- <td>
- <code>(?<!<i>x</i>)</code>
- </td>
- <td>
- <code><i>x</i></code>, via negative lookbehind. <code><i>x</i></code>
- cannot contain any quantifiers.
- </td>
- </tr>
- <tr>
- <td>
- <code>(?><i>x</i>)</code>
- </td>
- <td>
- <code><i>x</i>{1}+</code>
- </td>
- </tr>
- <tr>
- <td colspan="2">
- &nbsp;
- </td>
- </tr>
- <tr>
- <td colspan="2">
- <b>Registered Expression Matching</b>
- </td>
- </tr>
- <tr>
- <td>
- <code>{<i>x</i>}</code>
- </td>
- <td>
- The registered pattern <code><i>x</i></code>
- </td>
- </tr>
- </table>
-
- <hr>
-
- <i>Begin Text Extracted And Modified From java.util.regex.Pattern documentation</i>
-
- <h4> Backslashes, escapes, and quoting </h4>
-
- <p> The backslash character (<tt>'\'</tt>) serves to introduce escaped
- constructs, as defined in the table above, as well as to quote characters
- that otherwise would be interpreted as unescaped constructs. Thus the
- expression <tt>\\</tt> matches a single backslash and <tt>\{</tt> matches a
- left brace.
-
- <p> It is an error to use a backslash prior to any alphabetic character that
- does not denote an escaped construct; these are reserved for future
- extensions to the regular-expression language. A backslash may be used
- prior to a non-alphabetic character regardless of whether that character is
- part of an unescaped construct.
-
- <p>It is necessary to double backslashes in string literals that represent
- regular expressions to protect them from interpretation by a compiler. The
- string literal <tt>"&#92;b"</tt>, for example, matches a single backspace
- character when interpreted as a regular expression, while
- <tt>"&#92;&#92;b"</tt> matches a word boundary. The string litera
- <tt>"&#92;(hello&#92;)"</tt> is illegal and leads to a compile-time error;
- in order to match the string <tt>(hello)</tt> the string literal
- <tt>"&#92;&#92;(hello&#92;&#92;)"</tt> must be used.
-
- <h4> Character Classes </h4>
-
- <p> Character classes may appear within other character classes, and
- may be composed by the union operator (implicit) and the intersection
- operator (<tt>&amp;&amp;</tt>).
- The union operator denotes a class that contains every character that is
- in at least one of its operand classes. The intersection operator
- denotes a class that contains every character that is in both of its
- operand classes.
-
- <p> The precedence of character-class operators is as follows, from
- highest to lowest:
-
- <blockquote><table border="0" cellpadding="1" cellspacing="0"
- summary="Precedence of character class operators.">
-
- <tr><th>1&nbsp;&nbsp;&nbsp;&nbsp;</th>
- <td>Literal escape&nbsp;&nbsp;&nbsp;&nbsp;</td>
- <td><tt>\x</tt></td></tr>
- <tr><th>2&nbsp;&nbsp;&nbsp;&nbsp;</th>
- <td>Range</td>
- <td><tt>a-z</tt></td></tr>
- <tr><th>3&nbsp;&nbsp;&nbsp;&nbsp;</th>
- <td>Grouping</td>
- <td><tt>[...]</tt></td></tr>
- <tr><th>4&nbsp;&nbsp;&nbsp;&nbsp;</th>
- <td>Intersection</td>
- <td><tt>[a-z&&[aeiou]]</tt></td></tr>
- <tr><th>5&nbsp;&nbsp;&nbsp;&nbsp;</th>
- <td>Union</td>
- <td><tt>[a-e][i-u]<tt></td></tr>
- </table></blockquote>
-
- <p> Note that a different set of metacharacters are in effect inside
- a character class than outside a character class. For instance, the
- regular expression <tt>.</tt> loses its special meaning inside a
- character class, while the expression <tt>-</tt> becomes a range
- forming metacharacter.
-
- <a name="lt">
-
- <a name="cg">
- <h4> Groups and capturing </h4>
-
- <p> Capturing groups are numbered by counting their opening parentheses from
- left to right. In the expression <tt>((A)(B(C)))</tt>, for example, there
- are four such groups: </p>
-
- <blockquote><table cellpadding=1 cellspacing=0 summary="Capturing group numberings">
-
- <tr><th>1&nbsp;&nbsp;&nbsp;&nbsp;</th>
- <td><tt>((A)(B(C)))</tt></td></tr>
- <tr><th>2&nbsp;&nbsp;&nbsp;&nbsp;</th>
- <td><tt>(A)</tt></td></tr>
- <tr><th>3&nbsp;&nbsp;&nbsp;&nbsp;</th>
- <td><tt>(B(C))</tt></td></tr>
-
- <tr><th>4&nbsp;&nbsp;&nbsp;&nbsp;</th>
- <td><tt>(C)</tt></td></tr>
- </table></blockquote>
-
- <p> Group zero always stands for the entire expression.
-
- <p> Capturing groups are so named because, during a match, each subsequence
- of the input sequence that matches such a group is saved. The captured
- subsequence may be used later in the expression, via a back reference, and
- may also be retrieved from the matcher once the match operation is complete.
-
- <p> The captured input associated with a group is always the subsequence
- that the group most recently matched. If a group is evaluated a second time
- because of quantification then its previously-captured value, if any, will
- be retained if the second evaluation fails. Matching the string
- <tt>"aba"</tt> against the expression <tt>(a(b)?)+</tt>, for example, leaves
- group two set to <tt>"b"</tt>. All captured input is discarded at the
- beginning of each match.
-
- <p> Groups beginning with <tt>(?</tt> are pure, <i>non-capturing</i> groups
- that do not capture text and do not count towards the group total.
-
-
- <h4> Unicode support </h4>
-
- <p> Coming Soon.
-
- <h4> Comparison to Perl 5 </h4>
-
- <p>The <code>Pattern</code> engine performs traditional NFA-based matching
- with ordered alternation as occurs in Perl 5.
-
- <p> Perl constructs not supported by this class: </p>
-
- <ul>
-
- <li><p> The conditional constructs <tt>(?{</tt><i>X</i><tt>})</tt> and
- <tt>(?(</tt><i>condition</i><tt>)</tt><i>X</i><tt>|</tt><i>Y</i><tt>)</tt>,
- </p></li>
-
- <li><p> The embedded code constructs <tt>(?{</tt><i>code</i><tt>})</tt>
- and <tt>(??{</tt><i>code</i><tt>})</tt>,</p></li>
-
- <li><p> The embedded comment syntax <tt>(?#comment)</tt>, and </p></li>
-
- <li><p> The preprocessing operations <tt>\l</tt> <tt>&#92;u</tt>,
- <tt>\L</tt>, and <tt>\U</tt>. </p></li>
-
- <li><p> Embedded flags</p></li>
-
- </ul>
-
- <p> Constructs supported by this class but not by Perl: </p>
-
- <ul>
-
- <li><p> Possessive quantifiers, which greedily match as much as they can
- and do not back off, even when doing so would allow the overall match to
- succeed. </p></li>
-
- <li><p> Character-class union and intersection as described
- above.</p></li>
-
- </ul>
-
- <p> Notable differences from Perl: </p>
-
- <ul>
-
- <li><p> In Perl, <tt>\1</tt> through <tt>\9</tt> are always interpreted
- as back references; a backslash-escaped number greater than <tt>9</tt> is
- treated as a back reference if at least that many subexpressions exist,
- otherwise it is interpreted, if possible, as an octal escape. In this
- class octal escapes must always begin with a zero. In this class,
- <tt>\1</tt> through <tt>\9</tt> are always interpreted as back
- references, and a larger number is accepted as a back reference if at
- least that many subexpressions exist at that point in the regular
- expression, otherwise the parser will drop digits until the number is
- smaller or equal to the existing number of groups or it is one digit.
- </p></li>
-
- <li><p> Perl uses the <tt>g</tt> flag to request a match that resumes
- where the last match left off. This functionality is provided implicitly
- by the <CODE>Matcher</CODE> class: Repeated invocations of the
- <code>find</code> method will resume where the last match left off,
- unless the matcher is reset. </p></li>
-
- <li><p> Perl is forgiving about malformed matching constructs, as in the
- expression <tt>*a</tt>, as well as dangling brackets, as in the
- expression <tt>abc]</tt>, and treats them as literals. This
- class also strict and will not compile a pattern when dangling characters
- are encountered.</p></li>
-
- </ul>
-
-
- <p> For a more precise description of the behavior of regular expression
- constructs, please see <a href="http://www.oreilly.com/catalog/regex2/">
- <i>Mastering Regular Expressions, 2nd Edition</i>, Jeffrey E. F. Friedl,
- O'Reilly and Associates, 2002.</a>
- </p>
- <P>
-
- <i>End Text Extracted And Modified From java.util.regex.Pattern documentation</i>
-
- <hr>
-
- @author Jeffery Stuart
- @since March 2003, Stable Since November 2004
- @version 1.07.00
- @memo A class used to represent "PERL 5"-ish regular expressions
- */
-class Pattern
-{
- friend class Matcher;
- friend class NFANode;
- friend class NFAQuantifierNode;
- private:
- /**
- This constructor should not be called directly. Those wishing to use the
- Pattern class should instead use the {@link compile compile} method.
-
- @param rhs The pattern to compile
- @memo Creates a new pattern from the regular expression in <code>rhs</code>.
- */
- Pattern(const bkstring & rhs);
- protected:
- /**
- This currently is not used, so don't try to do anything with it.
- @memo Holds all the compiled patterns for quick access.
- */
- static std::map<bkstring, Pattern *> compiledPatterns;
- /**
- Holds all of the registered patterns as strings. Due to certain problems
- with compilation of patterns, especially with capturing groups, this seemed
- to be the best way to do it.
- */
- static std::map<bkstring, std::pair<bkstring, unsigned long> > registeredPatterns;
- protected:
- /**
- Holds all the NFA nodes used. This makes deletion of a pattern, as well as
- clean-up from an unsuccessful compile much easier and faster.
- */
- std::map<NFANode*, bool> nodes;
- /**
- Used when methods like split are called. The matcher class uses a lot of
- dynamic memeory, so having an instance increases speedup of certain
- operations.
- */
- Matcher * matcher;
- /**
- The front node of the NFA.
- */
- NFANode * head;
- /**
- The actual regular expression we rerpesent
- */
- bkstring pattern;
- /**
- Flag used during compilation. Once the pattern is successfully compiled,
- <code>error</code> is no longer used.
- */
- bool error;
- /**
- Used during compilation to keep track of the current index into
- <code>{@link pattern pattern}<code>. Once the pattern is successfully
- compiled, <code>error</code> is no longer used.
- */
- int curInd;
- /**
- The number of capture groups this contains.
- */
- int groupCount;
- /**
- The number of non-capture groups this contains.
- */
- int nonCapGroupCount;
- /**
- The flags specified when this was compiled.
- */
- unsigned long flags;
- protected:
- /**
- Raises an error during compilation. Compilation will cease at that point
- and compile will return <code>NULL</code>.
- */
- void raiseError();
- /**
- Convenience function for registering a node in <code>nodes</code>.
- @param node The node to register
- @return The registered node
- */
- NFANode * registerNode(NFANode * node);
-
- /**
- Calculates the union of two strings. This function will first sort the
- strings and then use a simple selection algorithm to find the union.
- @param s1 The first "class" to union
- @param s2 The second "class" to union
- @return A new string containing all unique characters. Each character
- must have appeared in one or both of <code>s1</code> and
- <code>s2</code>.
- */
- bkstring classUnion (bkstring s1, bkstring s2) const;
- /**
- Calculates the intersection of two strings. This function will first sort
- the strings and then use a simple selection algorithm to find the
- intersection.
- @param s1 The first "class" to intersect
- @param s2 The second "class" to intersect
- @return A new string containing all unique characters. Each character
- must have appeared both <code>s1</code> and <code>s2</code>.
- */
- bkstring classIntersect (bkstring s1, bkstring s2) const;
- /**
- Calculates the negation of a string. The negation is the set of all
- characters between <code>\x00</code> and <code>\xFF</code> not
- contained in <code>s1</code>.
- @param s1 The "class" to be negated.
- @param s2 The second "class" to intersect
- @return A new string containing all unique characters. Each character
- must have appeared both <code>s1</code> and <code>s2</code>.
- */
- bkstring classNegate (bkstring s1) const;
- /**
- Creates a new "class" representing the range from <code>low</code> thru
- <code>hi</code>. This function will wrap if <code>low</code> &gt;
- <code>hi</code>. This is a feature, not a buf. Sometimes it is useful
- to be able to say [\x70-\x10] instead of [\x70-\x7F\x00-\x10].
- @param low The beginning character
- @param hi The ending character
- @return A new string containing all the characters from low thru hi.
- */
- bkstring classCreateRange(char low, char hi) const;
-
- /**
- Extracts a decimal number from the substring of member-variable
- <code>{@link pattern pattern}<code> starting at <code>start</code> and
- ending at <code>end</code>.
- @param start The starting index in <code>{@link pattern pattern}<code>
- @param end The last index in <code>{@link pattern pattern}<code>
- @return The decimal number in <code>{@link pattern pattern}<code>
- */
- int getInt(int start, int end);
- /**
- Parses a <code>{n,m}</code> string out of the member-variable
- <code>{@link pattern pattern}<code> stores the result in <code>sNum</code>
- and <code>eNum</code>.
- @param sNum Output parameter. The minimum number of matches required
- by the curly quantifier are stored here.
- @param eNum Output parameter. The maximum number of matches allowed
- by the curly quantifier are stored here.
- @return Success/Failure. Fails when the curly does not have the proper
- syntax
- */
- bool quantifyCurly(int & sNum, int & eNum);
- /**
- Tries to quantify the currently parsed group. If the group being parsed
- is indeed quantified in the member-variable
- <code>{@link pattern pattern}<code>, then the NFA is modified accordingly.
- @param start The starting node of the current group being parsed
- @param stop The ending node of the current group being parsed
- @param gn The group number of the current group being parsed
- @return The node representing the starting node of the group. If the
- group becomes quantified, then this node is not necessarily
- a GroupHead node.
- */
- NFANode * quantifyGroup(NFANode * start, NFANode * stop, const int gn);
-
- /**
- Tries to quantify the last parsed expression. If the character was indeed
- quantified, then the NFA is modified accordingly.
- @param newNode The recently created expression node
- @return The node representing the last parsed expression. If the
- expression was quantified, <code>return value != newNode</code>
- */
- NFANode * quantify(NFANode * newNode);
- /**
- Parses the current class being examined in
- <code>{@link pattern pattern}</code>.
- @return A string of unique characters contained in the current class being
- parsed
- */
- bkstring parseClass();
- /**
- Parses the current POSIX class being examined in
- <code>{@link pattern pattern}</code>.
- @return A string of unique characters representing the POSIX class being
- parsed
- */
- bkstring parsePosix();
- /**
- Returns a string containing the octal character being parsed
- @return The string contained the octal value being parsed
- */
- bkstring parseOctal();
- /**
- Returns a string containing the hex character being parsed
- @return The string contained the hex value being parsed
- */
- bkstring parseHex();
- /**
- Returns a new node representing the back reference being parsed
- @return The new node representing the back reference being parsed
- */
- NFANode * parseBackref();
- /**
- Parses the escape sequence currently being examined. Determines if the
- escape sequence is a class, a single character, or the beginning of a
- quotation sequence.
- @param inv Output parameter. Whether or not to invert the returned class
- @param quo Output parameter. Whether or not this sequence starts a
- quotation.
- @return The characters represented by the class
- */
- bkstring parseEscape(bool & inv, bool & quo);
- /**
- Parses a supposed registered pattern currently under compilation. If the
- sequence of characters does point to a registered pattern, then the
- registered pattern is appended to <code>*end<code>. The registered pattern
- is parsed with the current compilation flags.
- @param end The ending node of the thus-far compiled pattern
- @return The new end node of the current pattern
- */
- NFANode * parseRegisteredPattern(NFANode ** end);
- /**
- Parses a lookbehind expression. Appends the necessary nodes
- <code>*end</code>.
- @param pos Positive or negative look behind
- @param end The ending node of the current pattern
- @return The new end node of the current pattern
- */
- NFANode * parseBehind(const bool pos, NFANode ** end);
- /**
- Parses the current expression and tacks on nodes until a \E is found.
- @return The end of the current pattern
- */
- NFANode * parseQuote();
- /**
- Parses <code>{@link pattern pattern}</code>. This function is called
- recursively when an or (<code>|</code>) or a group is encountered.
- @param inParen Are we currently parsing inside a group
- @param inOr Are we currently parsing one side of an or (<code>|</code>)
- @param end The end of the current expression
- @return The starting node of the NFA constructed from this parse
- */
- NFANode * parse(const bool inParen = 0, const bool inOr = 0, NFANode ** end = NULL);
- public:
- /// We should match regardless of case
- const static unsigned long CASE_INSENSITIVE;
- /// We are implicitly quoted
- const static unsigned long LITERAL;
- /// @memo We should treat a <code><b>.</b></code> as [\x00-\x7F]
- const static unsigned long DOT_MATCHES_ALL;
- /** <code>^</code> and <code>$</code> should anchor to the beginning and
- ending of lines, not all input
- */
- const static unsigned long MULTILINE_MATCHING;
- /** When enabled, only instances of <code>\n</codes> are recognized as
- line terminators
- */
- const static unsigned long UNIX_LINE_MODE;
- /// The absolute minimum number of matches a quantifier can match (0)
- const static int MIN_QMATCH;
- /// The absolute maximum number of matches a quantifier can match (0x7FFFFFFF)
- const static int MAX_QMATCH;
- public:
- /**
- Call this function to compile a regular expression into a
- <code>Pattern</code> object. Special values can be assigned to
- <code>mode</code> when certain non-standard behaviors are expected from
- the <code>Pattern</code> object.
- @param pattern The regular expression to compile
- @param mode A bitwise or of flags signalling what special behaviors are
- wanted from this <code>Pattern</code> object
- @return If successful, <code>compile</code> returns a <code>Pattern</code>
- pointer. Upon failure, <code>compile</code> returns
- <code>NULL</code>
- */
- static Pattern * compile (const bkstring & pattern,
- const unsigned long mode = 0);
- /**
- Dont use this function. This function will compile a pattern, and cache
- the result. This will eventually be used as an optimization when people
- just want to call static methods using the same pattern over and over
- instead of first compiling the pattern and then using the compiled
- instance for matching.
- @param pattern The regular expression to compile
- @param mode A bitwise or of flags signalling what special behaviors are
- wanted from this <code>Pattern</code> object
- @return If successful, <code>compileAndKeep</code> returns a
- <code>Pattern</code> pointer. Upon failure, <code>compile</code>
- returns <code>NULL</code>.
- */
- static Pattern * compileAndKeep (const bkstring & pattern,
- const unsigned long mode = 0);
-
- /**
- Searches through <code>replace</code> and replaces all substrings matched
- by <code>pattern</code> with <code>str</code>. <code>str</code> may
- contain backreferences (e.g. <code>\1</code>) to capture groups. A typical
- invocation looks like:
- <p>
- <code>
- Pattern::replace("(a+)b(c+)", "abcccbbabcbabc", "\\2b\\1");
- </code>
- <p>
- which would replace <code>abcccbbabcbabc</code> with
- <code>cccbabbcbabcba</code>.
- @param pattern The regular expression
- @param str The replacement text
- @param replacementText The string in which to perform replacements
- @param mode The special mode requested of the <code>Pattern</code>
- during the replacement process
- @return The text with the replacement string substituted where necessary
- */
- static bkstring replace (const bkstring & pattern,
- const bkstring & str,
- const bkstring & replacementText,
- const unsigned long mode = 0);
-
- /**
- Splits the specified string over occurrences of the specified pattern.
- Empty strings can be optionally ignored. The number of strings returned is
- configurable. A typical invocation looks like:
- <p>
- <code>
- bkstring str(strSize, '\0');<br>
- FILE * fp = fopen(fileName, "r");<br>
- fread((char*)str.data(), strSize, 1, fp);<br>
- fclose(fp);<br>
- <br>
- std::vector&lt;bkstring&gt; lines = Pattern::split("[\r\n]+", str, true);<br>
- <br>
- </code>
-
- @param pattern The regular expression
- @param replace The string to split
- @param keepEmptys Whether or not to keep empty strings
- @param limit The maximum number of splits to make
- @param mode The special mode requested of the <code>Pattern</code>
- during the split process
- @return All substrings of <code>str</code> split across <code>pattern</code>.
- */
- static std::vector<bkstring> split (const bkstring & pattern,
- const bkstring & str,
- const bool keepEmptys = 0,
- const unsigned long limit = 0,
- const unsigned long mode = 0);
-
- /**
- Finds all the instances of the specified pattern within the string. You
- should be careful to only pass patterns with a minimum length of one. For
- example, the pattern <code>a*</code> can be matched by an empty string, so
- instead you should pass <code>a+</code> since at least one character must
- be matched. A typical invocation of <code>findAll</code> looks like:
- <p>
- <code>
- std::vector&lt;td::string&gt; numbers = Pattern::findAll("\\d+", string);
- </code>
- <p>
-
- @param pattern The pattern for which to search
- @param str The string to search
- @param mode The special mode requested of the <code>Pattern</code>
- during the find process
- @return All instances of <code>pattern</code> in <code>str</code>
- */
- static std::vector<bkstring> findAll (const bkstring & pattern,
- const bkstring & str,
- const unsigned long mode = 0);
-
- /**
- Determines if an entire string matches the specified pattern
-
- @param pattern The pattern for to match
- @param str The string to match
- @param mode The special mode requested of the <code>Pattern</code>
- during the replacement process
- @return True if <code>str</code> is recognized by <code>pattern</code>
- */
- static bool matches (const bkstring & pattern,
- const bkstring & str,
- const unsigned long mode = 0);
-
- /**
- Registers a pattern under a specific name for use in later compilations.
- A typical invocation and later use looks like:
- <p>
- <code>
- Pattern::registerPattern("ip", "(?:\\d{1,3}\\.){3}\\d{1,3}");<br>
- Pattern * p1 = Pattern::compile("{ip}:\\d+");<br>
- Pattern * p2 = Pattern::compile("Connection from ({ip}) on port \\d+");<br>
- </code>
- <p>
- Multiple calls to <code>registerPattern</code> with the same
- <code>name</code> will result in the pattern getting overwritten.
-
- @param name The name to give to the pattern
- @param pattern The pattern to register
- @param mode Any special flags to use when compiling pattern
- @return Success/Failure. Fails only if <code>pattern</code> has invalid
- syntax
- */
- static bool registerPattern(const bkstring & name,
- const bkstring & pattern,
- const unsigned long mode = 0);
-
- /**
- Clears the pattern registry
- */
- static void unregisterPatterns();
- /**
- Don't use
- */
- static void clearPatternCache();
-
- /**
- Searches through a string for the <code>n<sup>th</sup></code> match of the
- given pattern in the string. Match indeces start at zero, not one.
- A typical invocation looks like this:
- <p>
- <code>
- std::pair&lt;bkstring, int&gt; match = Pattern::findNthMatch("\\d{1,3}", "192.168.1.101:22", 1);<br>
- printf("%s %i\n", match.first.c_str(), match.second);<br>
- <br>
- Output: 168 4<br>
- <br>
-
- @param pattern The pattern for which to search
- @param str The string to search
- @param matchNum Which match to find
- @param mode Any special flags to use during the matching process
- @return A string and an integer. The string is the string matched. The
- integer is the starting location of the matched string in
- <code>str</code>. You can check for success/failure by making sure
- that the integer returned is greater than or equal to zero.
- */
- static std::pair<bkstring, int> findNthMatch (const bkstring & pattern,
- const bkstring & str,
- const int matchNum,
- const unsigned long mode = 0);
- public:
- /**
- Deletes all NFA nodes allocated during compilation
- */
- ~Pattern();
-
- bkstring replace (const bkstring & str,
- const bkstring & replacementText);
- std::vector<bkstring> split (const bkstring & str, const bool keepEmptys = 0,
- const unsigned long limit = 0);
- std::vector<bkstring> findAll (const bkstring & str);
- bool matches (const bkstring & str);
- /**
- Returns the flags used during compilation of this pattern
- @return The flags used during compilation of this pattern
- */
- unsigned long getFlags () const;
- /**
- Returns the regular expression this pattern represents
- @return The regular expression this pattern represents
- */
- bkstring getPattern () const;
- /**
- Creates a matcher object using the specified string and this pattern.
- @param str The string to match against
- @return A new matcher using object using this pattern and the specified
- string
- */
- Matcher * createMatcher (const bkstring & str);
-};
-
-class NFANode
-{
- friend class Matcher;
- public:
- NFANode * next;
- NFANode();
- virtual ~NFANode();
- virtual void findAllNodes(std::map<NFANode*, bool> & soFar);
- virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const = 0;
- inline virtual bool isGroupHeadNode() const { return false; }
- inline virtual bool isStartOfInputNode() const { return false; }
-};
-class NFACharNode : public NFANode
-{
- protected:
- char ch;
- public:
- NFACharNode(const char c);
- virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
-};
-class NFACICharNode : public NFANode
-{
- protected:
- char ch;
- public:
- NFACICharNode(const char c);
- virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
-};
-class NFAStartNode : public NFANode
-{
- public:
- NFAStartNode();
- virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
-};
-class NFAEndNode : public NFANode
-{
- public:
- NFAEndNode();
- virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
-};
-class NFAQuantifierNode : public NFANode
-{
- public:
- int min, max;
- NFANode * inner;
- virtual void findAllNodes(std::map<NFANode*, bool> & soFar);
- NFAQuantifierNode(Pattern * pat, NFANode * internal,
- const int minMatch = Pattern::MIN_QMATCH,
- const int maxMatch = Pattern::MAX_QMATCH);
- virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
-};
-class NFAGreedyQuantifierNode : public NFAQuantifierNode
-{
- public:
- NFAGreedyQuantifierNode(Pattern * pat, NFANode * internal,
- const int minMatch = Pattern::MIN_QMATCH,
- const int maxMatch = Pattern::MAX_QMATCH);
- virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
- virtual int matchInternal(const bkstring & str, Matcher * matcher, const int curInd, const int soFar) const;
-};
-class NFALazyQuantifierNode : public NFAQuantifierNode
-{
- public:
- NFALazyQuantifierNode(Pattern * pat, NFANode * internal,
- const int minMatch = Pattern::MIN_QMATCH,
- const int maxMatch = Pattern::MAX_QMATCH);
- virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
-};
-class NFAPossessiveQuantifierNode : public NFAQuantifierNode
-{
- public:
- NFAPossessiveQuantifierNode(Pattern * pat, NFANode * internal,
- const int minMatch = Pattern::MIN_QMATCH,
- const int maxMatch = Pattern::MAX_QMATCH);
- virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
-};
-class NFAAcceptNode : public NFANode
-{
- public:
- NFAAcceptNode();
- virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
-};
-class NFAClassNode : public NFANode
-{
- public:
- bool inv;
- std::map<char, bool> vals;
- NFAClassNode(const bool invert = 0);
- NFAClassNode(const bkstring & clazz, const bool invert);
- virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
-};
-class NFACIClassNode : public NFANode
-{
- public:
- bool inv;
- std::map<char, bool> vals;
- NFACIClassNode(const bool invert = 0);
- NFACIClassNode(const bkstring & clazz, const bool invert);
- virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
-};
-class NFASubStartNode : public NFANode
-{
- public:
- NFASubStartNode();
- virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
-};
-class NFAOrNode : public NFANode
-{
- public:
- NFANode * one;
- NFANode * two;
- NFAOrNode(NFANode * first, NFANode * second);
- virtual void findAllNodes(std::map<NFANode*, bool> & soFar);
- virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
-};
-class NFAQuoteNode : public NFANode
-{
- public:
- bkstring qStr;
- NFAQuoteNode(const bkstring & quoted);
- virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
-};
-class NFACIQuoteNode : public NFANode
-{
- public:
- bkstring qStr;
- NFACIQuoteNode(const bkstring & quoted);
- virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
-};
-class NFALookAheadNode : public NFANode
-{
- public:
- bool pos;
- NFANode * inner;
- NFALookAheadNode(NFANode * internal, const bool positive);
- virtual void findAllNodes(std::map<NFANode*, bool> & soFar);
- virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
-};
-class NFALookBehindNode : public NFANode
-{
- public:
- bool pos;
- bkstring mStr;
- NFALookBehindNode(const bkstring & str, const bool positive);
- virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
-};
-class NFAStartOfLineNode : public NFANode
-{
- public:
- NFAStartOfLineNode();
- virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
-};
-class NFAEndOfLineNode : public NFANode
-{
- public:
- NFAEndOfLineNode();
- virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
-};
-class NFAReferenceNode : public NFANode
-{
- public:
- int gi;
- NFAReferenceNode(const int groupIndex);
- virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
-};
-class NFAStartOfInputNode : public NFANode
-{
- public:
- NFAStartOfInputNode();
- virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
- inline virtual bool isStartOfInputNode() const { return true; }
-};
-class NFAEndOfInputNode : public NFANode
-{
- public:
- bool term;
- NFAEndOfInputNode(const bool lookForTerm);
- virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
-};
-class NFAWordBoundaryNode : public NFANode
-{
- public:
- bool pos;
- NFAWordBoundaryNode(const bool positive);
- virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
-};
-class NFAEndOfMatchNode : public NFANode
-{
- public:
- NFAEndOfMatchNode();
- virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
-};
-class NFAGroupHeadNode : public NFANode
-{
- public:
- int gi;
- NFAGroupHeadNode(const int groupIndex);
- virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
- inline virtual bool isGroupHeadNode() const { return true; }
-};
-class NFAGroupTailNode : public NFANode
-{
- public:
- int gi;
- NFAGroupTailNode(const int groupIndex);
- virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
-};
-class NFAGroupLoopPrologueNode : public NFANode
-{
- public:
- int gi;
- NFAGroupLoopPrologueNode(const int groupIndex);
- virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
-};
-class NFAGroupLoopNode : public NFANode
-{
- public:
- int gi, min, max, type;
- NFANode * inner;
- NFAGroupLoopNode(NFANode * internal, const int minMatch,
- const int maxMatch, const int groupIndex, const int matchType);
- virtual void findAllNodes(std::map<NFANode*, bool> & soFar);
- virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
- int matchGreedy(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
- int matchLazy(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
- int matchPossessive(const bkstring & str, Matcher * matcher, const int curInd = 0) const;
-};
-
-#endif
-
diff --git a/plugins/SmileyAdd/src/regexp/WCMatcher.cpp b/plugins/SmileyAdd/src/regexp/WCMatcher.cpp
index bed5a1944b..d74fc9f9b5 100644
--- a/plugins/SmileyAdd/src/regexp/WCMatcher.cpp
+++ b/plugins/SmileyAdd/src/regexp/WCMatcher.cpp
@@ -1,5 +1,4 @@
-#include "WCMatcher.h"
-#include "WCPattern.h"
+#include "..\general.h"
const int WCMatcher::MATCH_ENTIRE_STRING = 0x01;
diff --git a/plugins/SmileyAdd/src/regexp/WCMatcher.h b/plugins/SmileyAdd/src/regexp/WCMatcher.h
index 23cc49e41f..45610690ba 100644
--- a/plugins/SmileyAdd/src/regexp/WCMatcher.h
+++ b/plugins/SmileyAdd/src/regexp/WCMatcher.h
@@ -1,10 +1,6 @@
#ifndef __WCMATCHER_H__
#define __WCMATCHER_H__
-#include "bkstring.h"
-#include <vector>
-#include <WCPattern.h>
-
/**
A matcher is a non thread-safe object used to scan strings using a given
{@link WCPattern WCPattern} object. Using a <code>WCMatcher</code> is the preferred
diff --git a/plugins/SmileyAdd/src/regexp/WCPattern.cpp b/plugins/SmileyAdd/src/regexp/WCPattern.cpp
index 25f379f5e4..26a4054dfe 100644
--- a/plugins/SmileyAdd/src/regexp/WCPattern.cpp
+++ b/plugins/SmileyAdd/src/regexp/WCPattern.cpp
@@ -23,18 +23,7 @@
@version 1.07.00
*/
-#ifdef _WIN32
- #pragma warning(push)
- #pragma warning(disable:4996)
-#endif
-
-#include <WCPattern.h>
-#include <WCMatcher.h>
-#include <wchar.h>
-#include <algorithm>
-#ifndef _WIN32
- #include <wctype.h>
-#endif
+#include "..\general.h"
std::map<bkstring, WCPattern *> WCPattern::compiledWCPatterns;
std::map<bkstring, std::pair<bkstring, unsigned long> > WCPattern::registeredWCPatterns;
@@ -1740,8 +1729,4 @@ int NFAGroupLoopUNode::matchPossessive(const bkstring & str, WCMatcher * matcher
if (matcher->groups[gi] == matcher->groupPos[gi]) ret = next->match(str, matcher, curInd);
}
return ret;
-}
-
-#ifdef _WIN32
- #pragma warning(pop)
-#endif
+} \ No newline at end of file
diff --git a/plugins/SmileyAdd/src/regexp/WCPattern.h b/plugins/SmileyAdd/src/regexp/WCPattern.h
index 3d52a7fd2e..143eff7eb7 100644
--- a/plugins/SmileyAdd/src/regexp/WCPattern.h
+++ b/plugins/SmileyAdd/src/regexp/WCPattern.h
@@ -1,15 +1,6 @@
#ifndef __WCPATTERN_H__
#define __WCPATTERN_H__
-#ifdef _WIN32
- #pragma warning(disable:4786)
-#endif
-
-#include "bkstring.h"
-
-#include <vector>
-#include <map>
-
class WCMatcher;
class NFAUNode;
class NFAQuantifierUNode;
diff --git a/plugins/SmileyAdd/src/regexp/test.cpp b/plugins/SmileyAdd/src/regexp/test.cpp
deleted file mode 100644
index bb41566f96..0000000000
--- a/plugins/SmileyAdd/src/regexp/test.cpp
+++ /dev/null
@@ -1,38 +0,0 @@
-#include <regexp/Matcher.h>
-#include <regexp/Pattern.h>
-#include <regexp/UnicodeMatcher.h>
-#include <regexp/UnicodePattern.h>
-
-int main()
-{
- {
- Pattern * p = Pattern::compile("^([^:]*)://([^/:]*)((?::[0-9]+)?)/?(([^?]*)((?:\\?.*)?))$");
- Matcher * m0 = p->createMatcher("http://www.example.com:80/test.php?a=1&a=1&a=1");
-
- if (m0->matches())
- {
- std::vector<std::string> groups = m0->getGroups(true);
- for (int i = 0; i < (int)groups.size(); ++i)
- {
- printf("m->group(%d): %s\n", i, groups[i].c_str());
- }
- }
- }
- {
- std::wstring pat = L"^([^:]*)://([^/:]*)((?::[0-9]+)?)/?(([^?]*)((?:\\?.*)?))$";
- std::wstring mat = L"http://www.example.com:80/test.php?a=1&a=1&a=1";
- UnicodePattern * p = UnicodePattern::compile(pat);
- UnicodeMatcher * m0 = p->createUnicodeMatcher(mat);
-
- if (m0->matches())
- {
- std::vector<std::wstring> groups = m0->getGroups(true);
- for (int i = 0; i < (int)groups.size(); ++i)
- {
- wprintf(L"m->group(%d): %s\n", i, groups[i].c_str());
- }
- }
- }
-
- return 0;
-}