summaryrefslogtreecommitdiff
path: root/plugins/SmileyAdd/src/regexp/WCPattern.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/SmileyAdd/src/regexp/WCPattern.cpp')
-rw-r--r--plugins/SmileyAdd/src/regexp/WCPattern.cpp2744
1 files changed, 1344 insertions, 1400 deletions
diff --git a/plugins/SmileyAdd/src/regexp/WCPattern.cpp b/plugins/SmileyAdd/src/regexp/WCPattern.cpp
index e44c835b39..c6d3d500f9 100644
--- a/plugins/SmileyAdd/src/regexp/WCPattern.cpp
+++ b/plugins/SmileyAdd/src/regexp/WCPattern.cpp
@@ -13,7 +13,7 @@
I tend to delete email if I don't recognize the name and the subject is
something like "I Need Your Help" or "Got A Second" or "I Found It".
"
- */
+ */
/*
Detailed documentation is provided in this class' header file
@@ -21,1173 +21,1110 @@
@author Jeffery Stuart
@since November 2004
@version 1.07.00
-*/
+ */
#include "..\general.h"
-std::map<bkstring, WCPattern *> WCPattern::compiledWCPatterns;
-std::map<bkstring, std::pair<bkstring, unsigned long> > WCPattern::registeredWCPatterns;
+std::map<CMString, WCPattern *> WCPattern::compiledWCPatterns;
+std::map<CMString, std::pair<CMString, unsigned long> > WCPattern::registeredWCPatterns;
const int WCPattern::MIN_QMATCH = 0x00000000;
const int WCPattern::MAX_QMATCH = 0x7FFFFFFF;
-const unsigned long WCPattern::CASE_INSENSITIVE = 0x01;
-const unsigned long WCPattern::LITERAL = 0x02;
-const unsigned long WCPattern::DOT_MATCHES_ALL = 0x04;
-const unsigned long WCPattern::MULTILINE_MATCHING = 0x08;
-const unsigned long WCPattern::UNIX_LINE_MODE = 0x10;
+const unsigned long WCPattern::CASE_INSENSITIVE = 0x01;
+const unsigned long WCPattern::LITERAL = 0x02;
+const unsigned long WCPattern::DOT_MATCHES_ALL = 0x04;
+const unsigned long WCPattern::MULTILINE_MATCHING = 0x08;
+const unsigned long WCPattern::UNIX_LINE_MODE = 0x10;
#define to_lower(a) (wchar_t)(UINT_PTR)CharLowerW((LPWSTR)(unsigned)a)
#define is_alpha IsCharAlphaW
#if defined(_WIN32)
- #define str_icmp lstrcmpiW
+#define str_icmp lstrcmpiW
#elif defined(__CYGWIN__) || defined(__APPLE__)
- #include <wctype.h>
- static inline int str_icmp(const wchar_t * a, const wchar_t * b)
- {
- while (*a && *b)
- {
- const int t = (int)towlower(*a) - (int)tolower(*b);
- if (t) return t;
- ++a; ++b;
- }
- if (*a)
- {
- if (*b) return (int)towlower(*a) - (int)tolower(*b);
- return 1;
- }
- else if (*b) return 1;
- return 0;
- }
+#include <wctype.h>
+static inline int str_icmp(const wchar_t * a, const wchar_t * b)
+{
+ while (*a && *b)
+ {
+ const int t = (int)towlower(*a) - (int)tolower(*b);
+ if (t) return t;
+ ++a; ++b;
+ }
+ if (*a)
+ {
+ if (*b) return (int)towlower(*a) - (int)tolower(*b);
+ return 1;
+ }
+ else if (*b) return 1;
+ return 0;
+}
#else
- #define str_icmp wcscasecmp
+#define str_icmp wcscasecmp
#endif
-WCPattern::WCPattern(const bkstring & rhs)
+WCPattern::WCPattern(const CMString & rhs)
{
- matcher = NULL;
- pattern = rhs;
- curInd = 0;
- groupCount = 0;
- nonCapGroupCount = 0;
- error = 0;
- head = NULL;
+ matcher = NULL;
+ pattern = rhs;
+ curInd = 0;
+ groupCount = 0;
+ nonCapGroupCount = 0;
+ error = 0;
+ head = NULL;
}
// convenience function in case we want to add any extra debugging output
void WCPattern::raiseError()
{
-/* switch (pattern[curInd - 1])
- {
- case '*':
- case ')':
- case '+':
- case '?':
- case ']':
- case '}':
- fwprintf(stderr, L"%s\n%*c^\n", pattern.c_str(), curInd - 1, ' ');
- fwprintf(stderr, L"Syntax Error near here. Possible unescaped meta character.\n");
- break;
- default:
- fwprintf(stderr, L"%s\n%*c^\n", pattern.c_str(), curInd - 1, ' ');
- fwprintf(stderr, L"Syntax Error near here. \n");
- break;
- }*/
- error = 1;
+ /* switch (pattern[curInd - 1])
+ {
+ case '*':
+ case ')':
+ case '+':
+ case '?':
+ case ']':
+ case '}':
+ fwprintf(stderr, L"%s\n%*c^\n", pattern.c_str(), curInd - 1, ' ');
+ fwprintf(stderr, L"Syntax Error near here. Possible unescaped meta character.\n");
+ break;
+ default:
+ fwprintf(stderr, L"%s\n%*c^\n", pattern.c_str(), curInd - 1, ' ');
+ fwprintf(stderr, L"Syntax Error near here. \n");
+ break;
+ }*/
+ error = 1;
}
NFAUNode * WCPattern::registerNode(NFAUNode * node)
{
- nodes[node] = 1;
- return node;
+ nodes[node] = 1;
+ return node;
}
-bkstring WCPattern::classUnion (bkstring s1, bkstring s2) const
+CMString WCPattern::classUnion(CMString s1, CMString s2) const
{
- wchar_t * out = new wchar_t[66000];
- std::sort(s1.begin(), s1.end());
- std::sort(s2.begin(), s2.end());
- wchar_t* p = std::set_union(s1.begin(), s1.end(), s2.begin(), s2.end(), out); *p = 0;
- bkstring ret = out;
- delete [] out;
- return ret;
+ wchar_t * out = new wchar_t[66000];
+ std::sort((LPTSTR)s1.GetString(), (LPTSTR)s1.GetTail());
+ std::sort((LPTSTR)s2.GetString(), (LPTSTR)s2.GetTail());
+ wchar_t* p = std::set_union(s1.GetString(), s1.GetTail(), s2.GetString(), s2.GetTail(), out); *p = 0;
+ CMString ret = out;
+ delete[] out;
+ return ret;
}
-bkstring WCPattern::classIntersect (bkstring s1, bkstring s2) const
+
+CMString WCPattern::classIntersect(CMString s1, CMString s2) const
{
- wchar_t * out = new wchar_t[66000];
- std::sort(s1.begin(), s1.end());
- std::sort(s2.begin(), s2.end());
- *std::set_intersection(s1.begin(), s1.end(), s2.begin(), s2.end(), out) = 0;
- bkstring ret = out;
- delete [] out;
- return ret;
+ wchar_t * out = new wchar_t[66000];
+ std::sort((LPTSTR)s1.GetString(), (LPTSTR)s1.GetTail());
+ std::sort((LPTSTR)s2.GetString(), (LPTSTR)s2.GetTail());
+ *std::set_intersection(s1.GetString(), s1.GetTail(), s2.GetString(), s2.GetTail(), out) = 0;
+ CMString ret = out;
+ delete[] out;
+ return ret;
}
-bkstring WCPattern::classNegate (bkstring s1) const
+
+CMString WCPattern::classNegate(CMString s1) const
{
- wchar_t * out = new wchar_t[66000];
- int i, ind = 0;
- std::map<wchar_t, bool> m;
+ wchar_t * out = new wchar_t[66000];
+ int i, ind = 0;
+ std::map<wchar_t, bool> m;
- for (i = 0; i < (int)s1.size(); ++i) m[s1[i]] = 1;
- for (i = 0xFF; i >= 0; --i) if (m.find((wchar_t)i) == m.end()) out[ind++] = (wchar_t)i;
- out[ind] = 0;
- bkstring ret(out, ind);
- delete [] out;
- return ret;
+ for (i = 0; i < s1.GetLength(); ++i) m[s1[i]] = 1;
+ for (i = 0xFF; i >= 0; --i) if (m.find((wchar_t)i) == m.end()) out[ind++] = (wchar_t)i;
+ out[ind] = 0;
+ CMString ret(out, ind);
+ delete[] out;
+ return ret;
}
-bkstring WCPattern::classCreateRange(wchar_t low, wchar_t hi) const
+
+CMString WCPattern::classCreateRange(wchar_t low, wchar_t hi) const
{
- wchar_t out[300];
- int ind = 0;
- while (low != hi) out[ind++] = low++;
- out[ind++] = low;
- return bkstring(out, ind);
+ wchar_t out[300];
+ int ind = 0;
+ while (low != hi) out[ind++] = low++;
+ out[ind++] = low;
+ return CMString(out, ind);
}
int WCPattern::getInt(int start, int end)
{
- int ret = 0;
- for (; start <= end; ++start) ret = ret * 10 + (pattern[start] - (wchar_t)'0');
- return ret;
+ int ret = 0;
+ for (; start <= end; ++start) ret = ret * 10 + (pattern[start] - '0');
+ return ret;
}
+
bool WCPattern::quantifyCurly(int & sNum, int & eNum)
{
- bool good = 1;
- int i, ci = curInd + 1;
- int commaInd = ci, endInd = ci, len = (int)pattern.size();
- sNum = eNum = 0;
-
- while (endInd < len && pattern[endInd ] != (wchar_t)'}') ++endInd;
- while (commaInd < endInd && pattern[commaInd] != (wchar_t)',') ++commaInd;
- if (endInd >= len) { raiseError(); return 0; }
- for (i = ci; good && i < endInd; ++i) if (i != commaInd && !isdigit(pattern[i])) good = 0;
- if (!good && commaInd < endInd) { raiseError(); return 0; }
- if (!good) return 0;
- /* so now everything in here is either a comma (and there is at most one comma) or a digit */
- if (commaInd == ci) // {,*}
- {
- if (endInd == commaInd + 1) { sNum = MIN_QMATCH; eNum = MAX_QMATCH; } // {,} = *
- else { sNum = MIN_QMATCH; eNum = getInt(commaInd + 1, endInd - 1); } // {,+}
- }
- else if (commaInd == endInd - 1) { sNum = getInt(ci, commaInd - 1); eNum = MAX_QMATCH; } // {+,}
- else if (commaInd == endInd) { sNum = getInt(ci, endInd - 1); eNum = sNum; } // {+}
- else { sNum = getInt(ci, commaInd - 1); eNum = getInt(commaInd + 1, endInd - 1); } // {+,+}
- curInd = endInd + 1;
- return 1;
+ bool good = 1;
+ int i, ci = curInd + 1;
+ int commaInd = ci, endInd = ci, len = pattern.GetLength();
+ sNum = eNum = 0;
+
+ while (endInd < len && pattern[endInd] != '}') ++endInd;
+ while (commaInd < endInd && pattern[commaInd] != ',') ++commaInd;
+ if (endInd >= len) { raiseError(); return 0; }
+ for (i = ci; good && i < endInd; ++i) if (i != commaInd && !isdigit(pattern[i])) good = 0;
+ if (!good && commaInd < endInd) { raiseError(); return 0; }
+ if (!good) return 0;
+ /* so now everything in here is either a comma (and there is at most one comma) or a digit */
+ if (commaInd == ci) // {,*}
+ {
+ if (endInd == commaInd + 1) { sNum = MIN_QMATCH; eNum = MAX_QMATCH; } // {,} = *
+ else { sNum = MIN_QMATCH; eNum = getInt(commaInd + 1, endInd - 1); } // {,+}
+ }
+ else if (commaInd == endInd - 1) { sNum = getInt(ci, commaInd - 1); eNum = MAX_QMATCH; } // {+,}
+ else if (commaInd == endInd) { sNum = getInt(ci, endInd - 1); eNum = sNum; } // {+}
+ else { sNum = getInt(ci, commaInd - 1); eNum = getInt(commaInd + 1, endInd - 1); } // {+,+}
+ curInd = endInd + 1;
+ return 1;
}
+
NFAUNode * WCPattern::quantifyGroup(NFAUNode * start, NFAUNode * stop, const int gn)
{
- NFAUNode * newNode = NULL;
- int type = 0;
-
- if (curInd < (int)pattern.size())
- {
- wchar_t ch = (curInd + 1 >= (int)pattern.size()) ? USHRT_MAX : pattern[curInd + 1];
- switch (pattern[curInd])
- {
- case (wchar_t)'*':
- ++curInd;
- switch (ch)
- {
- case (wchar_t)'?': ++curInd; type = 1; break;
- case (wchar_t)'+': ++curInd; type = 2; break;
- }
- newNode = registerNode(new NFAGroupLoopPrologueUNode(gn));
- newNode->next = registerNode(new NFAGroupLoopUNode(start, MIN_QMATCH, MAX_QMATCH, gn, type));
- stop->next = newNode->next;
- return newNode;
- case (wchar_t)'?':
- ++curInd;
- switch (ch)
- {
- case (wchar_t)'?': ++curInd; type = 1; break;
- case (wchar_t)'+': ++curInd; type = 2; break;
- }
- newNode = registerNode(new NFAGroupLoopPrologueUNode(gn));
- newNode->next = registerNode(new NFAGroupLoopUNode(start, MIN_QMATCH, 1, gn, type));
- stop->next = newNode->next;
- return newNode;
- case (wchar_t)'+':
- ++curInd;
- switch (ch)
- {
- case (wchar_t)'?': ++curInd; type = 1; break;
- case (wchar_t)'+': ++curInd; type = 2; break;
- }
- newNode = registerNode(new NFAGroupLoopPrologueUNode(gn));
- newNode->next = registerNode(new NFAGroupLoopUNode(start, 1, MAX_QMATCH, gn, type));
- stop->next = newNode->next;
- return newNode;
- case (wchar_t)'{':
- {
- int s, e;
- if (quantifyCurly(s, e))
- {
- ch = (curInd < (int)pattern.size()) ? pattern[curInd] : USHRT_MAX;
- switch (ch)
- {
- case (wchar_t)'?': ++curInd; type = 1; break;
- case (wchar_t)'+': ++curInd; type = 2; break;
- }
- newNode = registerNode(new NFAGroupLoopPrologueUNode(gn));
- newNode->next = registerNode(new NFAGroupLoopUNode(start, s, e, gn, type));
- stop->next = newNode->next;
- return newNode;
- }
- }
- default:
- break;
- }
- }
- return NULL;
+ NFAUNode * newNode = NULL;
+ int type = 0;
+
+ if (curInd < pattern.GetLength()) {
+ wchar_t ch = (curInd + 1 >= pattern.GetLength()) ? USHRT_MAX : pattern[curInd + 1];
+ switch (pattern[curInd]) {
+ case '*':
+ ++curInd;
+ switch (ch) {
+ case '?': ++curInd; type = 1; break;
+ case '+': ++curInd; type = 2; break;
+ }
+ newNode = registerNode(new NFAGroupLoopPrologueUNode(gn));
+ newNode->next = registerNode(new NFAGroupLoopUNode(start, MIN_QMATCH, MAX_QMATCH, gn, type));
+ stop->next = newNode->next;
+ return newNode;
+ case '?':
+ ++curInd;
+ switch (ch) {
+ case '?': ++curInd; type = 1; break;
+ case '+': ++curInd; type = 2; break;
+ }
+ newNode = registerNode(new NFAGroupLoopPrologueUNode(gn));
+ newNode->next = registerNode(new NFAGroupLoopUNode(start, MIN_QMATCH, 1, gn, type));
+ stop->next = newNode->next;
+ return newNode;
+ case '+':
+ ++curInd;
+ switch (ch) {
+ case '?': ++curInd; type = 1; break;
+ case '+': ++curInd; type = 2; break;
+ }
+ newNode = registerNode(new NFAGroupLoopPrologueUNode(gn));
+ newNode->next = registerNode(new NFAGroupLoopUNode(start, 1, MAX_QMATCH, gn, type));
+ stop->next = newNode->next;
+ return newNode;
+ case '{':
+ {
+ int s, e;
+ if (quantifyCurly(s, e)) {
+ ch = (curInd < pattern.GetLength()) ? pattern[curInd] : USHRT_MAX;
+ switch (ch) {
+ case '?': ++curInd; type = 1; break;
+ case '+': ++curInd; type = 2; break;
+ }
+ newNode = registerNode(new NFAGroupLoopPrologueUNode(gn));
+ newNode->next = registerNode(new NFAGroupLoopUNode(start, s, e, gn, type));
+ stop->next = newNode->next;
+ return newNode;
+ }
+ }
+ }
+ }
+ return NULL;
}
NFAUNode * WCPattern::quantify(NFAUNode * newNode)
{
- if (curInd < (int)pattern.size())
- {
- wchar_t ch = (curInd + 1 >= (int)pattern.size()) ? USHRT_MAX : pattern[curInd + 1];
- switch (pattern[curInd])
- {
- case (wchar_t)'*':
- ++curInd;
- switch (ch)
- {
- case (wchar_t)'?': ++curInd; newNode = registerNode(new NFALazyQuantifierUNode (this, newNode, MIN_QMATCH, MAX_QMATCH)); break;
- case (wchar_t)'+': ++curInd; newNode = registerNode(new NFAPossessiveQuantifierUNode(this, newNode, MIN_QMATCH, MAX_QMATCH)); break;
- default: newNode = registerNode(new NFAGreedyQuantifierUNode (this, newNode, MIN_QMATCH, MAX_QMATCH)); break;
- }
- break;
- case (wchar_t)'?':
- ++curInd;
- switch (ch)
- {
- case (wchar_t)'?': ++curInd; newNode = registerNode(new NFALazyQuantifierUNode (this, newNode, MIN_QMATCH, 1)); break;
- case (wchar_t)'+': ++curInd; newNode = registerNode(new NFAPossessiveQuantifierUNode(this, newNode, MIN_QMATCH, 1)); break;
- default: newNode = registerNode(new NFAGreedyQuantifierUNode (this, newNode, MIN_QMATCH, 1)); break;
- }
- break;
- case (wchar_t)'+':
- ++curInd;
- switch (ch)
- {
- case (wchar_t)'?': ++curInd; newNode = registerNode(new NFALazyQuantifierUNode (this, newNode, 1, MAX_QMATCH)); break;
- case (wchar_t)'+': ++curInd; newNode = registerNode(new NFAPossessiveQuantifierUNode(this, newNode, 1, MAX_QMATCH)); break;
- default: newNode = registerNode(new NFAGreedyQuantifierUNode (this, newNode, 1, MAX_QMATCH)); break;
- }
- break;
- case (wchar_t)'{':
- {
- int s, e;
- if (quantifyCurly(s, e))
- {
- ch = (curInd < (int)pattern.size()) ? pattern[curInd] : USHRT_MAX;
- switch (ch)
- {
- case (wchar_t)'?': ++curInd; newNode = registerNode(new NFALazyQuantifierUNode (this, newNode, s, e)); break;
- case (wchar_t)'+': ++curInd; newNode = registerNode(new NFAPossessiveQuantifierUNode(this, newNode, s, e)); break;
- default: newNode = registerNode(new NFAGreedyQuantifierUNode (this, newNode, s, e)); break;
- }
- }
- }
- break;
- default:
- break;
- }
- }
- return newNode;
-}
-bkstring WCPattern::parseClass()
-{
- bkstring t, ret = L"";
- wchar_t ch, c1, c2;
- bool inv = 0, neg = 0, quo = 0;
-
- if (curInd < (int)pattern.size() && pattern[curInd] == (wchar_t)'^')
- {
- ++curInd;
- neg = 1;
- }
- while (curInd < (int)pattern.size() && pattern[curInd] != (wchar_t)']')
- {
- ch = pattern[curInd++];
- if (ch == (wchar_t)'[')
- {
- t = parseClass();
- ret = classUnion(ret, t);
- }
- /*else if (ch == (wchar_t)'-')
- {
- raiseError();
- curInd = pattern.size();
- }*/
- else if (ch == (wchar_t)'&' && curInd < (int)pattern.size() && pattern[curInd] == (wchar_t)'&')
- {
- if (pattern[++curInd] != (wchar_t)'[')
- {
- raiseError();
- curInd = (int)pattern.size();
- }
- else
- {
- ++curInd;
- t = parseClass();
- ret = classIntersect(ret, t);
- }
- }
- else if (ch == (wchar_t)'\\')
- {
- t = parseEscape(inv, quo);
- if (quo)
- {
- raiseError();
- curInd = (int)pattern.size();
- }
- else if (inv || t.size() > 1) // cant be part of a range (a-z)
- {
- if (inv) t = classNegate(t);
- ret = classUnion(ret, t);
- }
- else if (curInd < (int)pattern.size() && pattern[curInd] == (wchar_t)'-') // part of a range (a-z)
- {
- c1 = t[0];
- ++curInd;
- if (curInd >= (int)pattern.size()) raiseError();
- else
- {
- c2 = pattern[curInd++];
- if (c2 == (wchar_t)'\\')
- {
- t = parseEscape(inv, quo);
- if (quo)
- {
- raiseError();
- curInd = (int)pattern.size();
- }
- else if (inv || t.size() > 1) raiseError();
- else ret = classUnion(ret, classCreateRange(c1, c2));
- }
- else if (c2 == (wchar_t)'[' || c2 == (wchar_t)']' || c2 == (wchar_t)'-' || c2 == (wchar_t)'&')
- {
- raiseError();
- curInd = (int)pattern.size();
- }
- else ret = classUnion(ret, classCreateRange(c1, c2));
- }
- }
- else
- {
- ret = classUnion(ret, t);
- }
- }
- else if (curInd < (int)pattern.size() && pattern[curInd] == (wchar_t)'-')
- {
- c1 = ch;
- ++curInd;
- if (curInd >= (int)pattern.size()) raiseError();
- else
- {
- c2 = pattern[curInd++];
- if (c2 == (wchar_t)'\\')
- {
- t = parseEscape(inv, quo);
- if (quo)
- {
- raiseError();
- curInd = (int)pattern.size();
- }
- else if (inv || t.size() > 1) raiseError();
- else ret = classUnion(ret, classCreateRange(c1, c2));
- }
- else if (c2 == (wchar_t)'[' || c2 == (wchar_t)']' || c2 == (wchar_t)'-' || c2 == (wchar_t)'&')
- {
- raiseError();
- curInd = (int)pattern.size();
- }
- else
- {
- ret = classUnion(ret, classCreateRange(c1, c2));
- }
- }
- }
- else
- {
- ret += L" ";
- ret[ret.size() - 1] = ch;
- }
- }
- if (curInd >= (int)pattern.size() || pattern[curInd] != (wchar_t)']')
- {
- raiseError();
- ret = L"";
- }
- else
- {
- ++curInd;
- if (neg) ret = classNegate(ret);
- }
- return ret;
-}
-bkstring WCPattern::parsePosix()
-{
- bkstring s7 = pattern.substr(curInd, 7);
- if (s7 == L"{Lower}") { curInd += 7; return L"abcdefghijklmnopqrstuvwxyz"; }
- if (s7 == L"{Upper}") { curInd += 7; return L"ABCDEFGHIJKLMNOPQRSTUVWXYZ"; }
- if (s7 == L"{Alpha}") { curInd += 7; return L"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; }
- if (s7 == L"{Digit}") { curInd += 7; return L"0123456789"; }
- if (s7 == L"{Alnum}") { curInd += 7; return L"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; }
- if (s7 == L"{Punct}") { curInd += 7; return L"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; }
- if (s7 == L"{Graph}") { curInd += 7; return L"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; }
- if (s7 == L"{Print}") { curInd += 7; return L"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; }
- if (s7 == L"{Blank}") { curInd += 7; return L" \t"; }
- if (s7 == L"{Space}") { curInd += 7; return L" \t\n\x0B\f\r"; }
- if (s7 == L"{Cntrl}")
- {
- bkstring::value_type i;
- bkstring s = L" ";
-
- for (i = 0; i < 5; ++i) s += s;
- s += L" ";
- for (i = 0; i <= 0x1F; ++i) s[i] = i;
- s[0x20] = 0x7F;
- curInd += 7;
- return s;
- }
- if (s7 == L"{ASCII}")
- {
- bkstring s(0x80, (wchar_t)' ');
- for (bkstring::value_type i = 0; i <= 0x7f; ++i) s[i] = i;
- curInd += 7;
- return s;
- }
- if (pattern.substr(curInd, 8) == L"{XDigit}") { curInd += 8; return L"abcdefABCDEF0123456789"; }
- raiseError();
- return L"";
+ if (curInd < pattern.GetLength()) {
+ wchar_t ch = (curInd + 1 >= pattern.GetLength()) ? USHRT_MAX : pattern[curInd + 1];
+ switch (pattern[curInd]) {
+ case '*':
+ ++curInd;
+ switch (ch) {
+ case '?': ++curInd; newNode = registerNode(new NFALazyQuantifierUNode(this, newNode, MIN_QMATCH, MAX_QMATCH)); break;
+ case '+': ++curInd; newNode = registerNode(new NFAPossessiveQuantifierUNode(this, newNode, MIN_QMATCH, MAX_QMATCH)); break;
+ default: newNode = registerNode(new NFAGreedyQuantifierUNode(this, newNode, MIN_QMATCH, MAX_QMATCH)); break;
+ }
+ break;
+ case '?':
+ ++curInd;
+ switch (ch) {
+ case '?': ++curInd; newNode = registerNode(new NFALazyQuantifierUNode(this, newNode, MIN_QMATCH, 1)); break;
+ case '+': ++curInd; newNode = registerNode(new NFAPossessiveQuantifierUNode(this, newNode, MIN_QMATCH, 1)); break;
+ default: newNode = registerNode(new NFAGreedyQuantifierUNode(this, newNode, MIN_QMATCH, 1)); break;
+ }
+ break;
+ case '+':
+ ++curInd;
+ switch (ch) {
+ case '?': ++curInd; newNode = registerNode(new NFALazyQuantifierUNode(this, newNode, 1, MAX_QMATCH)); break;
+ case '+': ++curInd; newNode = registerNode(new NFAPossessiveQuantifierUNode(this, newNode, 1, MAX_QMATCH)); break;
+ default: newNode = registerNode(new NFAGreedyQuantifierUNode(this, newNode, 1, MAX_QMATCH)); break;
+ }
+ break;
+ case '{':
+ int s, e;
+ if (quantifyCurly(s, e)) {
+ ch = (curInd < pattern.GetLength()) ? pattern[curInd] : USHRT_MAX;
+ switch (ch) {
+ case '?': ++curInd; newNode = registerNode(new NFALazyQuantifierUNode(this, newNode, s, e)); break;
+ case '+': ++curInd; newNode = registerNode(new NFAPossessiveQuantifierUNode(this, newNode, s, e)); break;
+ default: newNode = registerNode(new NFAGreedyQuantifierUNode(this, newNode, s, e)); break;
+ }
+ }
+ break;
+ }
+ }
+ return newNode;
+}
+CMString WCPattern::parseClass()
+{
+ CMString t, ret = L"";
+ wchar_t ch, c1, c2;
+ bool inv = 0, neg = 0, quo = 0;
+
+ if (curInd < pattern.GetLength() && pattern[curInd] == '^')
+ {
+ ++curInd;
+ neg = 1;
+ }
+ while (curInd < pattern.GetLength() && pattern[curInd] != ']')
+ {
+ ch = pattern[curInd++];
+ if (ch == '[')
+ {
+ t = parseClass();
+ ret = classUnion(ret, t);
+ }
+ /*else if (ch == '-')
+ {
+ raiseError();
+ curInd = pattern.GetLength();
+ }*/
+ else if (ch == '&' && curInd < pattern.GetLength() && pattern[curInd] == '&') {
+ if (pattern[++curInd] != '[') {
+ raiseError();
+ curInd = pattern.GetLength();
+ }
+ else {
+ ++curInd;
+ t = parseClass();
+ ret = classIntersect(ret, t);
+ }
+ }
+ else if (ch == '\\')
+ {
+ t = parseEscape(inv, quo);
+ if (quo) {
+ raiseError();
+ curInd = pattern.GetLength();
+ }
+ else if (inv || t.GetLength() > 1) { // cant be part of a range (a-z)
+ if (inv) t = classNegate(t);
+ ret = classUnion(ret, t);
+ }
+ else if (curInd < pattern.GetLength() && pattern[curInd] == '-') { // part of a range (a-z)
+ c1 = t[0];
+ ++curInd;
+ if (curInd >= pattern.GetLength()) raiseError();
+ else {
+ c2 = pattern[curInd++];
+ if (c2 == '\\')
+ {
+ t = parseEscape(inv, quo);
+ if (quo)
+ {
+ raiseError();
+ curInd = pattern.GetLength();
+ }
+ else if (inv || t.GetLength() > 1) raiseError();
+ else ret = classUnion(ret, classCreateRange(c1, c2));
+ }
+ else if (c2 == '[' || c2 == ']' || c2 == '-' || c2 == '&')
+ {
+ raiseError();
+ curInd = pattern.GetLength();
+ }
+ else ret = classUnion(ret, classCreateRange(c1, c2));
+ }
+ }
+ else
+ {
+ ret = classUnion(ret, t);
+ }
+ }
+ else if (curInd < pattern.GetLength() && pattern[curInd] == '-')
+ {
+ c1 = ch;
+ ++curInd;
+ if (curInd >= pattern.GetLength()) raiseError();
+ else
+ {
+ c2 = pattern[curInd++];
+ if (c2 == '\\')
+ {
+ t = parseEscape(inv, quo);
+ if (quo)
+ {
+ raiseError();
+ curInd = pattern.GetLength();
+ }
+ else if (inv || t.GetLength() > 1) raiseError();
+ else ret = classUnion(ret, classCreateRange(c1, c2));
+ }
+ else if (c2 == '[' || c2 == ']' || c2 == '-' || c2 == '&')
+ {
+ raiseError();
+ curInd = pattern.GetLength();
+ }
+ else
+ {
+ ret = classUnion(ret, classCreateRange(c1, c2));
+ }
+ }
+ }
+ else ret.AppendChar(ch);
+ }
+ if (curInd >= pattern.GetLength() || pattern[curInd] != ']')
+ {
+ raiseError();
+ ret = L"";
+ }
+ else
+ {
+ ++curInd;
+ if (neg) ret = classNegate(ret);
+ }
+ return ret;
+}
+CMString WCPattern::parsePosix()
+{
+ CMString s7 = pattern.Mid(curInd, 7);
+ if (s7 == L"{Lower}") { curInd += 7; return L"abcdefghijklmnopqrstuvwxyz"; }
+ if (s7 == L"{Upper}") { curInd += 7; return L"ABCDEFGHIJKLMNOPQRSTUVWXYZ"; }
+ if (s7 == L"{Alpha}") { curInd += 7; return L"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; }
+ if (s7 == L"{Digit}") { curInd += 7; return L"0123456789"; }
+ if (s7 == L"{Alnum}") { curInd += 7; return L"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; }
+ if (s7 == L"{Punct}") { curInd += 7; return L"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; }
+ if (s7 == L"{Graph}") { curInd += 7; return L"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; }
+ if (s7 == L"{Print}") { curInd += 7; return L"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; }
+ if (s7 == L"{Blank}") { curInd += 7; return L" \t"; }
+ if (s7 == L"{Space}") { curInd += 7; return L" \t\n\x0B\f\r"; }
+ if (s7 == L"{Cntrl}")
+ {
+ TCHAR i;
+ CMString s = L" ";
+
+ for (i = 0; i < 5; ++i) s += s;
+ s += L" ";
+ for (i = 0; i <= 0x1F; ++i)
+ s.SetAt(i, i);
+ s.SetAt(0x20, 0x7F);
+ curInd += 7;
+ return s;
+ }
+ if (s7 == L"{ASCII}")
+ {
+ CMString s(' ', 0x80);
+ for (int i = 0; i <= 0x7f; ++i) s.SetAt(i, i);
+ curInd += 7;
+ return s;
+ }
+ if (pattern.Mid(curInd, 8) == L"{XDigit}") { curInd += 8; return L"abcdefABCDEF0123456789"; }
+ raiseError();
+ return L"";
}
NFAUNode * WCPattern::parseBackref()
{
- #define is_dig(x) ((x) >= (wchar_t)'0' && (x) <= (wchar_t)'9')
- #define to_int(x) ((x) - (wchar_t)'0')
- int ci = curInd;
- int oldRef = 0, ref = 0;
-
- while (ci < (int)pattern.size() && is_dig(pattern[ci]) && (ref < 10 || ref < groupCount))
- {
- oldRef = ref;
- ref = ref * 10 + to_int(pattern[ci++]);
- }
- if (ci == (int)pattern.size())
- {
- oldRef = ref;
- ++ci;
- }
- if (oldRef < 0 || ci <= curInd)
- {
- raiseError();
- return registerNode(new NFAReferenceUNode(-1));
- }
- curInd = ci;
- return registerNode(new NFAReferenceUNode(ref));
-
- #undef is_dig
- #undef to_int
-}
-bkstring WCPattern::parseOctal()
-{
- #define islowoc(x) ((x) >= (wchar_t)'0' && (x) <= (wchar_t)'3')
- #define isoc(x) ((x) >= (wchar_t)'0' && (x) <= (wchar_t)'7')
- #define fromoc(x) ((x) - (wchar_t)'0')
- int ci = curInd;
- wchar_t ch1 = (ci + 0 < (int)pattern.size()) ? pattern[ci + 0] : USHRT_MAX;
- wchar_t ch2 = (ci + 1 < (int)pattern.size()) ? pattern[ci + 1] : USHRT_MAX;
- wchar_t ch3 = (ci + 2 < (int)pattern.size()) ? pattern[ci + 2] : USHRT_MAX;
- bkstring s = L" ";
-
- if (islowoc(ch1) && isoc(ch2))
- {
- curInd += 2;
- s[0] = fromoc(ch1) * 8 + fromoc(ch2);
- if (isoc(ch3))
- {
- ++curInd;
- s[0] = s[0] * 8 + fromoc(ch3);
- }
- }
- else if (isoc(ch1) && isoc(ch2))
- {
- curInd += 2;
- s[0] = fromoc(ch1) * 8 + fromoc(ch2);
- }
- else raiseError();
-
- return s;
- #undef islowoc
- #undef isoc
- #undef fromoc
-}
-bkstring WCPattern::parseHex()
-{
- #define to_low(x) (((x) >= (wchar_t)'A' && (x) <= (wchar_t)'Z') ? ((x) - (wchar_t)'A' + (wchar_t)'a') : (x))
- #define is_dig(x) ((x) >= (wchar_t)'0' && (x) <= (wchar_t)'9')
- #define is_hex(x) (is_dig(x) || (to_low(x) >= (wchar_t)'a' && to_low(x) <= (wchar_t)'f'))
- #define to_int(x) ((is_dig(x)) ? ((x) - (wchar_t)'0') : (to_low(x) - (wchar_t)'a' + 10))
-
- int ci = curInd;
- wchar_t ch1 = (ci + 0 < (int)pattern.size()) ? pattern[ci + 0] : USHRT_MAX;
- wchar_t ch2 = (ci + 1 < (int)pattern.size()) ? pattern[ci + 1] : USHRT_MAX;
- wchar_t ch3 = (ci + 2 < (int)pattern.size()) ? pattern[ci + 2] : USHRT_MAX;
- wchar_t ch4 = (ci + 3 < (int)pattern.size()) ? pattern[ci + 3] : USHRT_MAX;
- bkstring s = L" ";
-
- if (is_hex(ch1) && is_hex(ch2) && is_hex(ch3) && is_hex(ch4))
- {
- curInd += 2;
- s[0] = (to_int(ch1) << 12 & 0xF000) | (to_int(ch2) << 8 & 0x0F00) |
- (to_int(ch3) << 4 & 0x0F00) | (to_int(ch4) & 0x000F);
- }
- else if (is_hex(ch1) && is_hex(ch2))
- {
- curInd += 2;
- s[0] = (to_int(ch1) << 4 & 0xF0) | (to_int(ch2) & 0x0F);
- }
-
- return s;
- #undef to_low
- #undef is_dig
- #undef is_hex
- #undef to_int
-}
-bkstring WCPattern::parseEscape(bool & inv, bool & quo)
-{
- wchar_t ch = pattern[curInd++];
- bkstring classes = L"";
-
- if (curInd > (int)pattern.size())
- {
- raiseError();
- return NULL;
- }
-
- quo = 0;
- inv = 0;
- switch (ch)
- {
- case (wchar_t)'p': classes = parsePosix(); break;
- case (wchar_t)'P': classes = L"!!"; classes += parsePosix(); break;
- case (wchar_t)'d': classes = L"0123456789"; break;
- case (wchar_t)'D': classes = L"!!0123456789"; break;
- case (wchar_t)'s': classes = L" \t\r\n\f"; break;
- case (wchar_t)'S': classes = L"!! \t\r\n\f"; break;
- case (wchar_t)'w': classes = L"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; break;
- case (wchar_t)'W': classes = L"!!abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; break;
- case (wchar_t)'0': classes = parseOctal(); break;
- case (wchar_t)'x': classes = parseHex(); break;
-
- case (wchar_t)'Q': quo = 1; break;
- case (wchar_t)'t': classes = L"\t"; break;
- case (wchar_t)'r': classes = L"\r"; break;
- case (wchar_t)'n': classes = L"\n"; break;
- case (wchar_t)'f': classes = L"\f"; break;
- case (wchar_t)'a': classes = L"\a"; break;
- case (wchar_t)'e': classes = L"\r"; break;
- default: classes = L" "; classes[0] = ch; break;
- }
- if (classes.substr(0, 2) == L"!!")
- {
- classes = classes.substr(2);
- inv = 1;
- }
- return classes;
-}
-NFAUNode * WCPattern::parseRegisteredWCPattern(NFAUNode ** end)
-{
- int i, j;
- bkstring s;
- NFAUNode * ret = NULL;
- for (i = curInd; i < (int)pattern.size() && pattern[i] != (wchar_t)'}'; ++i) { }
- if (pattern[i] != (wchar_t)'}') { raiseError(); return NULL; }
- if (i == curInd + 1) { raiseError(); return NULL; } // {}
- if (
- !(
- (pattern[curInd] >= (wchar_t)'a' && pattern[curInd] <= (wchar_t)'z') ||
- (pattern[curInd] >= (wchar_t)'A' && pattern[curInd] <= (wchar_t)'Z') ||
- (pattern[curInd] == (wchar_t)'_')
- )
- )
- {
- raiseError();
- return NULL;
- }
- for (j = curInd; !error && j < i; ++j)
- {
- if (
- !(
- (pattern[j] >= (wchar_t)'a' && pattern[j] <= (wchar_t)'z') ||
- (pattern[j] >= (wchar_t)'A' && pattern[j] <= (wchar_t)'Z') ||
- (pattern[j] >= (wchar_t)'0' && pattern[j] <= (wchar_t)'9') ||
- (pattern[j] == (wchar_t)'_')))
- {
- raiseError();
- return NULL;
- }
- }
- s = pattern.substr(curInd, i - curInd);
- if (registeredWCPatterns.find(s) == registeredWCPatterns.end()) raiseError();
- else
- {
- unsigned long oflags = flags;
- bkstring op = pattern;
- int ci = i + 1;
-
- pattern = registeredWCPatterns[s].first;
- curInd = 0;
- flags = registeredWCPatterns[s].second;
-
- --groupCount;
- ret = parse(0, 0, end);
-
- pattern = op;
- curInd = ci;
- flags = oflags;
- }
- if (error) { *end = ret = NULL; }
- return ret;
+#define is_dig(x) ((x) >= '0' && (x) <= '9')
+#define to_int(x) ((x) - '0')
+ int ci = curInd;
+ int oldRef = 0, ref = 0;
+
+ while (ci < pattern.GetLength() && is_dig(pattern[ci]) && (ref < 10 || ref < groupCount))
+ {
+ oldRef = ref;
+ ref = ref * 10 + to_int(pattern[ci++]);
+ }
+ if (ci == pattern.GetLength())
+ {
+ oldRef = ref;
+ ++ci;
+ }
+ if (oldRef < 0 || ci <= curInd)
+ {
+ raiseError();
+ return registerNode(new NFAReferenceUNode(-1));
+ }
+ curInd = ci;
+ return registerNode(new NFAReferenceUNode(ref));
+
+#undef is_dig
+#undef to_int
+}
+CMString WCPattern::parseOctal()
+{
+#define islowoc(x) ((x) >= '0' && (x) <= '3')
+#define isoc(x) ((x) >= '0' && (x) <= '7')
+#define fromoc(x) ((x) - '0')
+ int ci = curInd;
+ wchar_t ch1 = (ci + 0 < pattern.GetLength()) ? pattern[ci + 0] : USHRT_MAX;
+ wchar_t ch2 = (ci + 1 < pattern.GetLength()) ? pattern[ci + 1] : USHRT_MAX;
+ wchar_t ch3 = (ci + 2 < pattern.GetLength()) ? pattern[ci + 2] : USHRT_MAX;
+ CMString s = L" ";
+
+ if (islowoc(ch1) && isoc(ch2)) {
+ curInd += 2;
+ s.SetAt(0, fromoc(ch1) * 8 + fromoc(ch2));
+ if (isoc(ch3)) {
+ ++curInd;
+ s.SetAt(0, s[0] * 8 + fromoc(ch3));
+ }
+ }
+ else if (isoc(ch1) && isoc(ch2)) {
+ curInd += 2;
+ s.SetAt(0, fromoc(ch1) * 8 + fromoc(ch2));
+ }
+ else raiseError();
+
+ return s;
+#undef islowoc
+#undef isoc
+#undef fromoc
+}
+CMString WCPattern::parseHex()
+{
+#define to_low(x) (((x) >= 'A' && (x) <= 'Z') ? ((x) - 'A' + 'a') : (x))
+#define is_dig(x) ((x) >= '0' && (x) <= '9')
+#define is_hex(x) (is_dig(x) || (to_low(x) >= 'a' && to_low(x) <= 'f'))
+#define to_int(x) ((is_dig(x)) ? ((x) - '0') : (to_low(x) - 'a' + 10))
+
+ int ci = curInd;
+ wchar_t ch1 = (ci + 0 < pattern.GetLength()) ? pattern[ci + 0] : USHRT_MAX;
+ wchar_t ch2 = (ci + 1 < pattern.GetLength()) ? pattern[ci + 1] : USHRT_MAX;
+ wchar_t ch3 = (ci + 2 < pattern.GetLength()) ? pattern[ci + 2] : USHRT_MAX;
+ wchar_t ch4 = (ci + 3 < pattern.GetLength()) ? pattern[ci + 3] : USHRT_MAX;
+ CMString s = L" ";
+
+ if (is_hex(ch1) && is_hex(ch2) && is_hex(ch3) && is_hex(ch4))
+ {
+ curInd += 2;
+ s.SetAt(0, (to_int(ch1) << 12 & 0xF000) | (to_int(ch2) << 8 & 0x0F00) |
+ (to_int(ch3) << 4 & 0x0F00) | (to_int(ch4) & 0x000F));
+ }
+ else if (is_hex(ch1) && is_hex(ch2))
+ {
+ curInd += 2;
+ s.SetAt(0, (to_int(ch1) << 4 & 0xF0) | (to_int(ch2) & 0x0F));
+ }
+
+ return s;
+#undef to_low
+#undef is_dig
+#undef is_hex
+#undef to_int
+}
+CMString WCPattern::parseEscape(bool & inv, bool & quo)
+{
+ wchar_t ch = pattern[curInd++];
+ CMString classes;
+
+ if (curInd > pattern.GetLength()) {
+ raiseError();
+ return "";
+ }
+
+ quo = 0;
+ inv = 0;
+ switch (ch) {
+ case 'p': classes = parsePosix(); break;
+ case 'P': classes = L"!!"; classes += parsePosix(); break;
+ case 'd': classes = L"0123456789"; break;
+ case 'D': classes = L"!!0123456789"; break;
+ case 's': classes = L" \t\r\n\f"; break;
+ case 'S': classes = L"!! \t\r\n\f"; break;
+ case 'w': classes = L"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; break;
+ case 'W': classes = L"!!abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; break;
+ case '0': classes = parseOctal(); break;
+ case 'x': classes = parseHex(); break;
+
+ case 'Q': quo = 1; break;
+ case 't': classes = L"\t"; break;
+ case 'r': classes = L"\r"; break;
+ case 'n': classes = L"\n"; break;
+ case 'f': classes = L"\f"; break;
+ case 'a': classes = L"\a"; break;
+ case 'e': classes = L"\r"; break;
+ default: classes.AppendChar(ch); break;
+ }
+
+ if (classes.Mid(0, 2) == L"!!") {
+ classes = classes.Mid(2);
+ inv = 1;
+ }
+ return classes;
+}
+NFAUNode * WCPattern::parseRegisteredWCPattern(NFAUNode **end)
+{
+ int i, j;
+ CMString s;
+ NFAUNode * ret = NULL;
+ for (i = curInd; i < pattern.GetLength() && pattern[i] != '}'; ++i) {}
+ if (pattern[i] != '}') { raiseError(); return NULL; }
+ if (i == curInd + 1) { raiseError(); return NULL; } // {}
+ if (
+ !(
+ (pattern[curInd] >= 'a' && pattern[curInd] <= 'z') ||
+ (pattern[curInd] >= 'A' && pattern[curInd] <= 'Z') ||
+ (pattern[curInd] == '_')
+ )
+ )
+ {
+ raiseError();
+ return NULL;
+ }
+ for (j = curInd; !error && j < i; ++j)
+ {
+ if (
+ !(
+ (pattern[j] >= 'a' && pattern[j] <= 'z') ||
+ (pattern[j] >= 'A' && pattern[j] <= 'Z') ||
+ (pattern[j] >= '0' && pattern[j] <= '9') ||
+ (pattern[j] == '_')))
+ {
+ raiseError();
+ return NULL;
+ }
+ }
+ s = pattern.Mid(curInd, i - curInd);
+ if (registeredWCPatterns.find(s) == registeredWCPatterns.end()) raiseError();
+ else
+ {
+ unsigned long oflags = flags;
+ CMString op = pattern;
+ int ci = i + 1;
+
+ pattern = registeredWCPatterns[s].first;
+ curInd = 0;
+ flags = registeredWCPatterns[s].second;
+
+ --groupCount;
+ ret = parse(0, 0, end);
+
+ pattern = op;
+ curInd = ci;
+ flags = oflags;
+ }
+ if (error) { *end = ret = NULL; }
+ return ret;
}
// look behind should interpret everything as a literal (except \\) since the
// pattern must have a concrete length
NFAUNode * WCPattern::parseBehind(const bool pos, NFAUNode ** end)
{
- bkstring t = L"";
- while (curInd < (int)pattern.size() && pattern[curInd] != (wchar_t)')')
- {
- wchar_t ch = pattern[curInd++];
- t += L" ";
- if (ch == (wchar_t)'\\')
- {
- if (curInd + 1 >= (int)pattern.size())
- {
- raiseError();
- return *end = registerNode(new NFACharUNode((wchar_t)' '));
- }
- ch = pattern[curInd++];
- }
- t[t.size() - 1] = ch;
- }
- if (curInd >= (int)pattern.size() || pattern[curInd] != (wchar_t)')') raiseError();
- else ++curInd;
- return *end = registerNode(new NFALookBehindUNode(t, pos));
+ CMString t = L"";
+ while (curInd < pattern.GetLength() && pattern[curInd] != ')')
+ {
+ wchar_t ch = pattern[curInd++];
+ if (ch == '\\')
+ {
+ if (curInd + 1 >= pattern.GetLength())
+ {
+ raiseError();
+ return *end = registerNode(new NFACharUNode(' '));
+ }
+ ch = pattern[curInd++];
+ }
+ t.AppendChar(ch);
+ }
+ if (curInd >= pattern.GetLength() || pattern[curInd] != ')') raiseError();
+ else ++curInd;
+ return *end = registerNode(new NFALookBehindUNode(t, pos));
}
NFAUNode * WCPattern::parseQuote()
{
- bool done = 0;
- bkstring s = L"";
-
- while (!done)
- {
- if (curInd >= (int)pattern.size())
- {
- raiseError();
- done = 1;
- }
- else if (pattern.substr(curInd, 2) == L"\\E")
- {
- curInd += 2;
- done = 1;
- }
- else if (pattern[curInd] == (wchar_t)'\\')
- {
- s += L" ";
- s[s.size() - 1] = pattern[++curInd];
- ++curInd;
- }
- else
- {
- s += L" ";
- s[s.size() - 1] = pattern[curInd++];
- }
- }
- if ((flags & WCPattern::CASE_INSENSITIVE) != 0) return registerNode(new NFACIQuoteUNode(s));
- return registerNode(new NFAQuoteUNode(s));
+ bool done = 0;
+ CMString s = L"";
+
+ while (!done)
+ {
+ if (curInd >= pattern.GetLength())
+ {
+ raiseError();
+ done = 1;
+ }
+ else if (pattern.Mid(curInd, 2) == L"\\E")
+ {
+ curInd += 2;
+ done = 1;
+ }
+ else if (pattern[curInd] == '\\')
+ {
+ s.AppendChar(pattern[++curInd]);
+ ++curInd;
+ }
+ else s.AppendChar(pattern[curInd++]);
+ }
+ if ((flags & WCPattern::CASE_INSENSITIVE) != 0) return registerNode(new NFACIQuoteUNode(s));
+ return registerNode(new NFAQuoteUNode(s));
}
NFAUNode * WCPattern::parse(const bool inParen, const bool inOr, NFAUNode ** end)
{
- NFAUNode * start, * cur, * next = NULL;
- bkstring t;
- int grc = groupCount++;
- bool inv, quo;
- bool ahead = 0, pos = 0, noncap = 0, indep = 0;
- unsigned long oldFlags = flags;
-
- if (inParen)
- {
- if (pattern[curInd] == (wchar_t)'?')
- {
- ++curInd;
- --groupCount;
- if (pattern[curInd] == (wchar_t)':') { noncap = 1; ++curInd; grc = --nonCapGroupCount; }
- else if (pattern[curInd] == (wchar_t)'=') { ++curInd; ahead = 1; pos = 1; }
- else if (pattern[curInd] == (wchar_t)'!') { ++curInd; ahead = 1; pos = 0; }
- else if (pattern.substr(curInd, 2) == L"<=") { curInd += 2; return parseBehind(1, end); }
- else if (pattern.substr(curInd, 2) == L"<!") { curInd += 2; return parseBehind(0, end); }
- else if (pattern[curInd] == (wchar_t)'>') { ++curInd; indep = 1; }
- else
- {
- bool negate = false, done = false;
- while (!done)
- {
- if (curInd >= (int)pattern.size())
- {
- raiseError();
- return NULL;
- }
- else if (negate)
- {
- switch (pattern[curInd])
- {
- case (wchar_t)'i': flags &= ~WCPattern::CASE_INSENSITIVE; break;
- case (wchar_t)'d': flags &= ~WCPattern::UNIX_LINE_MODE; break;
- case (wchar_t)'m': flags &= ~WCPattern::MULTILINE_MATCHING; break;
- case (wchar_t)'s': flags &= ~WCPattern::DOT_MATCHES_ALL; break;
- case (wchar_t)':': done = true; break;
- case (wchar_t)')':
- ++curInd;
- *end = registerNode(new NFALookBehindUNode(L"", true));
- return *end;
- case (wchar_t)'-':
- default: raiseError(); return NULL;
- }
- }
- else
- {
- switch (pattern[curInd])
- {
- case (wchar_t)'i': flags |= WCPattern::CASE_INSENSITIVE; break;
- case (wchar_t)'d': flags |= WCPattern::UNIX_LINE_MODE; break;
- case (wchar_t)'m': flags |= WCPattern::MULTILINE_MATCHING; break;
- case (wchar_t)'s': flags |= WCPattern::DOT_MATCHES_ALL; break;
- case (wchar_t)':': done = true; break;
- case (wchar_t)'-': negate = true; break;
- case (wchar_t)')':
- ++curInd;
- *end = registerNode(new NFALookBehindUNode(L"", true));
- return *end;
- default: raiseError(); return NULL;
- }
- }
- ++curInd;
- }
- noncap = 1;
- grc = --nonCapGroupCount;
- }
-
- if (noncap) cur = start = registerNode(new NFAGroupHeadUNode(grc));
- else cur = start = registerNode(new NFASubStartUNode);
- }
- else cur = start = registerNode(new NFAGroupHeadUNode(grc));
- }
- else cur = start = registerNode(new NFASubStartUNode);
- while (curInd < (int)pattern.size())
- {
- wchar_t ch = pattern[curInd++];
-
- next = NULL;
- if (error) return NULL;
- switch (ch)
- {
- case (wchar_t)'^':
- if ((flags & WCPattern::MULTILINE_MATCHING) != 0) next = registerNode(new NFAStartOfLineUNode);
- else next = registerNode(new NFAStartOfInputUNode);
- break;
- case (wchar_t)'$':
- if ((flags & WCPattern::MULTILINE_MATCHING) != 0) next = registerNode(new NFAEndOfLineUNode);
- else next = registerNode(new NFAEndOfInputUNode(0));
- break;
- case (wchar_t)'|':
- --groupCount;
- cur->next = registerNode(new NFAAcceptUNode);
- cur = start = registerNode(new NFAOrUNode(start, parse(inParen, 1)));
- break;
- case (wchar_t)'\\':
- if (curInd < (int)pattern.size())
- {
- bool eoi = 0;
- switch (pattern[curInd])
- {
- case (wchar_t)'1':
- case (wchar_t)'2':
- case (wchar_t)'3':
- case (wchar_t)'4':
- case (wchar_t)'5':
- case (wchar_t)'6':
- case (wchar_t)'7':
- case (wchar_t)'8':
- case (wchar_t)'9': next = parseBackref(); break;
- case (wchar_t)'A': ++curInd; next = registerNode(new NFAStartOfInputUNode); break;
- case (wchar_t)'B': ++curInd; next = registerNode(new NFAWordBoundaryUNode(0)); break;
- case (wchar_t)'b': ++curInd; next = registerNode(new NFAWordBoundaryUNode(1)); break;
- case (wchar_t)'G': ++curInd; next = registerNode(new NFAEndOfMatchUNode); break;
- case (wchar_t)'Z': eoi = 1;
- case (wchar_t)'z': ++curInd; next = registerNode(new NFAEndOfInputUNode(eoi)); break;
- default:
- t = parseEscape(inv, quo);
- //printf("inv quo classes { %c %c %s }\n", inv ? (wchar_t)'t' : (wchar_t)'f', quo ? (wchar_t)'t' : (wchar_t)'f', t.c_str());
- if (!quo)
- {
- if (t.size() > 1 || inv)
- {
- if ((flags & WCPattern::CASE_INSENSITIVE) != 0) next = registerNode(new NFACIClassUNode(t, inv));
- else next = registerNode(new NFAClassUNode(t, inv));
- }
- else
- {
- next = registerNode(new NFACharUNode(t[0]));
- }
- }
- else
- {
- next = parseQuote();
- }
- }
- }
- else raiseError();
- break;
- case (wchar_t)'[':
- if ((flags & WCPattern::CASE_INSENSITIVE) == 0)
- {
- NFAClassUNode * clazz = new NFAClassUNode();
- bkstring s = parseClass();
- for (int i = 0; i < (int)s.size(); ++i) clazz->vals[s[i]] = 1;
- next = registerNode(clazz);
- }
- else
- {
- NFACIClassUNode * clazz = new NFACIClassUNode();
- bkstring s = parseClass();
- for (int i = 0; i < (int)s.size(); ++i) clazz->vals[to_lower(s[i])] = 1;
- next = registerNode(clazz);
- }
- break;
- case (wchar_t)'.':
- {
- bool useN = 1, useR = 1;
- NFAClassUNode * clazz = new NFAClassUNode(1);
- if ((flags & WCPattern::UNIX_LINE_MODE) != 0) useR = 0;
- if ((flags & WCPattern::DOT_MATCHES_ALL) != 0) useN = useR = 0;
- if (useN) clazz->vals[(wchar_t)'\n'] = 1;
- if (useR) clazz->vals[(wchar_t)'\r'] = 1;
- next = registerNode(clazz);
- }
- break;
- case (wchar_t)'(':
- {
- NFAUNode * end, * t1, * t2;
- t1 = parse(1, 0, &end);
- if (!t1) raiseError();
- else if (t1->isGroupHeadNode() && (t2 = quantifyGroup(t1, end, grc)) != NULL)
- {
- cur->next = t2;
- cur = t2->next;
- }
- else
- {
- cur->next = t1;
- cur = end;
- }
- }
- break;
- case (wchar_t)')':
- if (!inParen) raiseError();
- else if (inOr)
- {
- --curInd;
- cur = cur->next = registerNode(new NFAAcceptUNode);
- flags = oldFlags;
- return start;
- }
- else
- {
- if (ahead)
- {
- cur = cur->next = registerNode(new NFAAcceptUNode);
- flags = oldFlags;
- return *end = registerNode(new NFALookAheadUNode(start, pos));
- }
- else if (indep)
- {
- cur = cur->next = registerNode(new NFAAcceptUNode);
- flags = oldFlags;
- return *end = registerNode(new NFAPossessiveQuantifierUNode(this, start, 1, 1));
- }
- else // capping or noncapping, it doesnt matter
- {
- *end = cur = cur->next = registerNode(new NFAGroupTailUNode(grc));
- next = quantifyGroup(start, *end, grc);
- if (next)
- {
- start = next;
- *end = next->next;
- }
- flags = oldFlags;
- return start;
- }
- }
- break;
- case (wchar_t)'{': // registered pattern
- cur->next = parseRegisteredWCPattern(&next);
- if (cur->next) cur = next;
- break;
- case (wchar_t)'*':
- case (wchar_t)'+':
- case (wchar_t)'?':
-// case (wchar_t)'}':
-// case (wchar_t)']':
- raiseError();
- break;
- default:
- if ((flags & WCPattern::CASE_INSENSITIVE) != 0) next = registerNode(new NFACICharUNode(ch));
- else next = registerNode(new NFACharUNode(ch));
- break;
- }
- if (next) cur = cur->next = quantify(next);
- }
- if (inParen) raiseError();
- else
- {
- if (inOr) cur = cur->next = registerNode(new NFAAcceptUNode);
- if (end) *end = cur;
- }
-
- flags = oldFlags;
- if (error) return NULL;
-
- return start;
-}
-
-WCPattern * WCPattern::compile(const bkstring & pattern, const unsigned long mode)
-{
- WCPattern * p = new WCPattern(pattern);
- NFAUNode * end;
-
- p->flags = mode;
- if ((mode & WCPattern::LITERAL) != 0)
- {
- p->head = p->registerNode(new NFAStartUNode);
- if ((mode & WCPattern::CASE_INSENSITIVE) != 0) p->head->next = p->registerNode(new NFACIQuoteUNode(pattern));
- else p->head->next = p->registerNode(new NFAQuoteUNode(pattern));
- p->head->next->next = p->registerNode(new NFAEndUNode);
- }
- else
- {
- p->head = p->parse(0, 0, &end);
- if (!p->head)
- {
- delete p;
- p = NULL;
- }
- else
- {
- if (!(p->head && p->head->isStartOfInputNode()))
- {
- NFAUNode * n = p->registerNode(new NFAStartUNode);
- n->next = p->head;
- p->head = n;
- }
- end->next = p->registerNode(new NFAEndUNode);
- }
- }
- if (p != NULL)
- {
- p->matcher = new WCMatcher(p, L"");
- }
-
- return p;
-}
-
-WCPattern * WCPattern::compileAndKeep(const bkstring & pattern, const unsigned long mode)
-{
- WCPattern * ret = NULL;
- std::map<bkstring, WCPattern*>::iterator it = compiledWCPatterns.find(pattern);
-
- if (it != compiledWCPatterns.end())
- {
- ret = it->second;
- }
- else
- {
- ret = compile(pattern, mode);
- compiledWCPatterns[pattern] = ret;
- }
-
- return ret;
-}
-bkstring WCPattern::replace(const bkstring & pattern, const bkstring & str,
- const bkstring & replacementText, const unsigned long mode)
-{
- bkstring ret;
- WCPattern * p = WCPattern::compile(pattern, mode);
- if (p)
- {
- ret = p->replace(str, replacementText);
- delete p;
- }
- return ret;
-}
-
-std::vector<bkstring> WCPattern::split(const bkstring & pattern, const bkstring & str, const bool keepEmptys,
- const unsigned long limit, const unsigned long mode)
-{
- std::vector<bkstring> ret;
- WCPattern * p = WCPattern::compile(pattern, mode);
- if (p)
- {
- ret = p->split(str, keepEmptys, limit);
- delete p;
- }
- return ret;
-}
-
-std::vector<bkstring> WCPattern::findAll(const bkstring & pattern, const bkstring & str, const unsigned long mode)
-{
- std::vector<bkstring> ret;
- WCPattern * p = WCPattern::compile(pattern, mode);
- if (p)
- {
- ret = p->findAll(str);
- delete p;
- }
- return ret;
-}
-
-bool WCPattern::matches(const bkstring & pattern, const bkstring & str, const unsigned long mode)
-{
- bool ret = 0;
- WCPattern * p = compile(pattern, mode);
-
- if (p)
- {
- ret = p->matches(str);
- delete p;
- }
-
- return ret;
-}
-
-bool WCPattern::registerWCPattern(const bkstring & name, const bkstring & pattern, const unsigned long mode)
-{
- WCPattern * p = WCPattern::compile(pattern, mode);
- if (!p) return 0;
- WCPattern::registeredWCPatterns[name] = std::make_pair(pattern, mode);
- delete p;
- return 1;
+ NFAUNode * start, *cur, *next = NULL;
+ CMString t;
+ int grc = groupCount++;
+ bool inv, quo;
+ bool ahead = 0, pos = 0, noncap = 0, indep = 0;
+ unsigned long oldFlags = flags;
+
+ if (inParen)
+ {
+ if (pattern[curInd] == '?')
+ {
+ ++curInd;
+ --groupCount;
+ if (pattern[curInd] == ':') { noncap = 1; ++curInd; grc = --nonCapGroupCount; }
+ else if (pattern[curInd] == '=') { ++curInd; ahead = 1; pos = 1; }
+ else if (pattern[curInd] == '!') { ++curInd; ahead = 1; pos = 0; }
+ else if (pattern.Mid(curInd, 2) == L"<=") { curInd += 2; return parseBehind(1, end); }
+ else if (pattern.Mid(curInd, 2) == L"<!") { curInd += 2; return parseBehind(0, end); }
+ else if (pattern[curInd] == '>') { ++curInd; indep = 1; }
+ else
+ {
+ bool negate = false, done = false;
+ while (!done)
+ {
+ if (curInd >= pattern.GetLength())
+ {
+ raiseError();
+ return NULL;
+ }
+ else if (negate)
+ {
+ switch (pattern[curInd])
+ {
+ case 'i': flags &= ~WCPattern::CASE_INSENSITIVE; break;
+ case 'd': flags &= ~WCPattern::UNIX_LINE_MODE; break;
+ case 'm': flags &= ~WCPattern::MULTILINE_MATCHING; break;
+ case 's': flags &= ~WCPattern::DOT_MATCHES_ALL; break;
+ case ':': done = true; break;
+ case ')':
+ ++curInd;
+ *end = registerNode(new NFALookBehindUNode(L"", true));
+ return *end;
+ case '-':
+ default: raiseError(); return NULL;
+ }
+ }
+ else
+ {
+ switch (pattern[curInd])
+ {
+ case 'i': flags |= WCPattern::CASE_INSENSITIVE; break;
+ case 'd': flags |= WCPattern::UNIX_LINE_MODE; break;
+ case 'm': flags |= WCPattern::MULTILINE_MATCHING; break;
+ case 's': flags |= WCPattern::DOT_MATCHES_ALL; break;
+ case ':': done = true; break;
+ case '-': negate = true; break;
+ case ')':
+ ++curInd;
+ *end = registerNode(new NFALookBehindUNode(L"", true));
+ return *end;
+ default: raiseError(); return NULL;
+ }
+ }
+ ++curInd;
+ }
+ noncap = 1;
+ grc = --nonCapGroupCount;
+ }
+
+ if (noncap) cur = start = registerNode(new NFAGroupHeadUNode(grc));
+ else cur = start = registerNode(new NFASubStartUNode);
+ }
+ else cur = start = registerNode(new NFAGroupHeadUNode(grc));
+ }
+ else cur = start = registerNode(new NFASubStartUNode);
+ while (curInd < pattern.GetLength())
+ {
+ wchar_t ch = pattern[curInd++];
+
+ next = NULL;
+ if (error) return NULL;
+ switch (ch) {
+ case '^':
+ if ((flags & WCPattern::MULTILINE_MATCHING) != 0) next = registerNode(new NFAStartOfLineUNode);
+ else next = registerNode(new NFAStartOfInputUNode);
+ break;
+ case '$':
+ if ((flags & WCPattern::MULTILINE_MATCHING) != 0) next = registerNode(new NFAEndOfLineUNode);
+ else next = registerNode(new NFAEndOfInputUNode(0));
+ break;
+ case '|':
+ --groupCount;
+ cur->next = registerNode(new NFAAcceptUNode);
+ cur = start = registerNode(new NFAOrUNode(start, parse(inParen, 1)));
+ break;
+ case '\\':
+ if (curInd < pattern.GetLength()) {
+ bool eoi = 0;
+ switch (pattern[curInd]) {
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9': next = parseBackref(); break;
+ case 'A': ++curInd; next = registerNode(new NFAStartOfInputUNode); break;
+ case 'B': ++curInd; next = registerNode(new NFAWordBoundaryUNode(0)); break;
+ case 'b': ++curInd; next = registerNode(new NFAWordBoundaryUNode(1)); break;
+ case 'G': ++curInd; next = registerNode(new NFAEndOfMatchUNode); break;
+ case 'Z': eoi = 1;
+ case 'z': ++curInd; next = registerNode(new NFAEndOfInputUNode(eoi)); break;
+ default:
+ t = parseEscape(inv, quo);
+ //printf("inv quo classes { %c %c %s }\n", inv ? 't' : 'f', quo ? 't' : 'f', t.c_str());
+ if (!quo) {
+ if (t.GetLength() > 1 || inv) {
+ if ((flags & WCPattern::CASE_INSENSITIVE) != 0) next = registerNode(new NFACIClassUNode(t, inv));
+ else next = registerNode(new NFAClassUNode(t, inv));
+ }
+ else next = registerNode(new NFACharUNode(t[0]));
+ }
+ else next = parseQuote();
+ }
+ }
+ else raiseError();
+ break;
+ case '[':
+ if ((flags & WCPattern::CASE_INSENSITIVE) == 0) {
+ NFAClassUNode * clazz = new NFAClassUNode();
+ CMString s = parseClass();
+ for (int i = 0; i < (int)s.GetLength(); ++i) clazz->vals[s[i]] = 1;
+ next = registerNode(clazz);
+ }
+ else {
+ NFACIClassUNode * clazz = new NFACIClassUNode();
+ CMString s = parseClass();
+ for (int i = 0; i < s.GetLength(); ++i) clazz->vals[to_lower(s[i])] = 1;
+ next = registerNode(clazz);
+ }
+ break;
+ case '.':
+ {
+ bool useN = 1, useR = 1;
+ NFAClassUNode * clazz = new NFAClassUNode(1);
+ if ((flags & WCPattern::UNIX_LINE_MODE) != 0) useR = 0;
+ if ((flags & WCPattern::DOT_MATCHES_ALL) != 0) useN = useR = 0;
+ if (useN) clazz->vals['\n'] = 1;
+ if (useR) clazz->vals['\r'] = 1;
+ next = registerNode(clazz);
+ }
+ break;
+ case '(':
+ {
+ NFAUNode *end, *t1, *t2;
+ t1 = parse(1, 0, &end);
+ if (!t1) raiseError();
+ else if (t1->isGroupHeadNode() && (t2 = quantifyGroup(t1, end, grc)) != NULL) {
+ cur->next = t2;
+ cur = t2->next;
+ }
+ else {
+ cur->next = t1;
+ cur = end;
+ }
+ }
+ break;
+ case ')':
+ if (!inParen) raiseError();
+ else if (inOr)
+ {
+ --curInd;
+ cur = cur->next = registerNode(new NFAAcceptUNode);
+ flags = oldFlags;
+ return start;
+ }
+ else
+ {
+ if (ahead)
+ {
+ cur = cur->next = registerNode(new NFAAcceptUNode);
+ flags = oldFlags;
+ return *end = registerNode(new NFALookAheadUNode(start, pos));
+ }
+ else if (indep)
+ {
+ cur = cur->next = registerNode(new NFAAcceptUNode);
+ flags = oldFlags;
+ return *end = registerNode(new NFAPossessiveQuantifierUNode(this, start, 1, 1));
+ }
+ else // capping or noncapping, it doesnt matter
+ {
+ *end = cur = cur->next = registerNode(new NFAGroupTailUNode(grc));
+ next = quantifyGroup(start, *end, grc);
+ if (next)
+ {
+ start = next;
+ *end = next->next;
+ }
+ flags = oldFlags;
+ return start;
+ }
+ }
+ break;
+ case '{': // registered pattern
+ cur->next = parseRegisteredWCPattern(&next);
+ if (cur->next) cur = next;
+ break;
+ case '*':
+ case '+':
+ case '?':
+ // case '}':
+ // case ']':
+ raiseError();
+ break;
+ default:
+ if ((flags & WCPattern::CASE_INSENSITIVE) != 0) next = registerNode(new NFACICharUNode(ch));
+ else next = registerNode(new NFACharUNode(ch));
+ break;
+ }
+ if (next) cur = cur->next = quantify(next);
+ }
+ if (inParen) raiseError();
+ else
+ {
+ if (inOr) cur = cur->next = registerNode(new NFAAcceptUNode);
+ if (end) *end = cur;
+ }
+
+ flags = oldFlags;
+ if (error) return NULL;
+
+ return start;
+}
+
+WCPattern * WCPattern::compile(const CMString & pattern, const unsigned long mode)
+{
+ WCPattern * p = new WCPattern(pattern);
+ NFAUNode * end;
+
+ p->flags = mode;
+ if ((mode & WCPattern::LITERAL) != 0) {
+ p->head = p->registerNode(new NFAStartUNode);
+ if ((mode & WCPattern::CASE_INSENSITIVE) != 0) p->head->next = p->registerNode(new NFACIQuoteUNode(pattern));
+ else p->head->next = p->registerNode(new NFAQuoteUNode(pattern));
+ p->head->next->next = p->registerNode(new NFAEndUNode);
+ }
+ else {
+ p->head = p->parse(0, 0, &end);
+ if (!p->head) {
+ delete p;
+ p = NULL;
+ }
+ else {
+ if (!(p->head && p->head->isStartOfInputNode())) {
+ NFAUNode * n = p->registerNode(new NFAStartUNode);
+ n->next = p->head;
+ p->head = n;
+ }
+ end->next = p->registerNode(new NFAEndUNode);
+ }
+ }
+
+ if (p != NULL)
+ p->matcher = new WCMatcher(p, L"");
+
+ return p;
+}
+
+WCPattern * WCPattern::compileAndKeep(const CMString & pattern, const unsigned long mode)
+{
+ WCPattern * ret = NULL;
+ std::map<CMString, WCPattern*>::iterator it = compiledWCPatterns.find(pattern);
+ if (it != compiledWCPatterns.end())
+ ret = it->second;
+ else {
+ ret = compile(pattern, mode);
+ compiledWCPatterns[pattern] = ret;
+ }
+
+ return ret;
+}
+CMString WCPattern::replace(const CMString & pattern, const CMString & str,
+ const CMString & replacementText, const unsigned long mode)
+{
+ CMString ret;
+ WCPattern * p = WCPattern::compile(pattern, mode);
+ if (p)
+ {
+ ret = p->replace(str, replacementText);
+ delete p;
+ }
+ return ret;
+}
+
+std::vector<CMString> WCPattern::split(const CMString & pattern, const CMString & str, const bool keepEmptys,
+ const unsigned long limit, const unsigned long mode)
+{
+ std::vector<CMString> ret;
+ WCPattern *p = WCPattern::compile(pattern, mode);
+ if (p) {
+ ret = p->split(str, keepEmptys, limit);
+ delete p;
+ }
+ return ret;
+}
+
+std::vector<CMString> WCPattern::findAll(const CMString & pattern, const CMString & str, const unsigned long mode)
+{
+ std::vector<CMString> ret;
+ WCPattern *p = WCPattern::compile(pattern, mode);
+ if (p) {
+ ret = p->findAll(str);
+ delete p;
+ }
+ return ret;
+}
+
+bool WCPattern::matches(const CMString & pattern, const CMString & str, const unsigned long mode)
+{
+ bool ret = 0;
+ WCPattern * p = compile(pattern, mode);
+
+ if (p)
+ {
+ ret = p->matches(str);
+ delete p;
+ }
+
+ return ret;
+}
+
+bool WCPattern::registerWCPattern(const CMString & name, const CMString & pattern, const unsigned long mode)
+{
+ WCPattern * p = WCPattern::compile(pattern, mode);
+ if (!p) return 0;
+ WCPattern::registeredWCPatterns[name] = std::make_pair(pattern, mode);
+ delete p;
+ return 1;
}
void WCPattern::unregisterWCPatterns()
{
- registeredWCPatterns.clear();
+ registeredWCPatterns.clear();
}
void WCPattern::clearWCPatternCache()
{
- std::map<bkstring, WCPattern*>::iterator it;
- for (it = compiledWCPatterns.begin(); it != compiledWCPatterns.end(); ++it)
- {
- delete it->second;
- }
- compiledWCPatterns.clear();
+ std::map<CMString, WCPattern*>::iterator it;
+ for (it = compiledWCPatterns.begin(); it != compiledWCPatterns.end(); ++it)
+ delete it->second;
+
+ compiledWCPatterns.clear();
}
-std::pair<bkstring, int> WCPattern::findNthMatch(const bkstring & pattern, const bkstring & str,
- const int matchNum, const unsigned long mode)
+std::pair<CMString, int> WCPattern::findNthMatch(const CMString & pattern, const CMString & str,
+ const int matchNum, const unsigned long mode)
{
- std::pair<bkstring, int> ret;
- WCPattern * p = WCPattern::compile(pattern, mode);
+ std::pair<CMString, int> ret;
+ WCPattern * p = WCPattern::compile(pattern, mode);
- ret.second = -1;
- if (p)
- {
- int i = -1;
- p->matcher->setString(str);
- while (i < matchNum && p->matcher->findNextMatch()) { ++i; }
- if (i == matchNum && p->matcher->getStartingIndex() >= 0)
- {
- ret.first = p->matcher->getGroup(0);
- ret.second = p->matcher->getStartingIndex();
- }
- delete p;
- }
+ ret.second = -1;
+ if (p)
+ {
+ int i = -1;
+ p->matcher->setString(str);
+ while (i < matchNum && p->matcher->findNextMatch()) { ++i; }
+ if (i == matchNum && p->matcher->getStartingIndex() >= 0)
+ {
+ ret.first = p->matcher->getGroup(0);
+ ret.second = p->matcher->getStartingIndex();
+ }
+ delete p;
+ }
- return ret;
+ return ret;
}
WCPattern::~WCPattern()
{
- /*
- nodes.clear();
- if (head) head->findAllNodes(nodes);
- */
- if (matcher) delete matcher;
- for (std::map<NFAUNode*, bool>::iterator it = nodes.begin(); it != nodes.end(); ++it) delete it->first;
+ /*
+ nodes.Empty();
+ if (head) head->findAllNodes(nodes);
+ */
+ if (matcher) delete matcher;
+ for (std::map<NFAUNode*, bool>::iterator it = nodes.begin(); it != nodes.end(); ++it)
+ delete it->first;
}
-bkstring WCPattern::replace(const bkstring & str, const bkstring & replacementText)
+CMString WCPattern::replace(const CMString & str, const CMString & replacementText)
{
- int li = 0;
- bkstring ret = L"";
+ int li = 0;
+ CMString ret = L"";
- matcher->setString(str);
- while (matcher->findNextMatch())
- {
- ret += str.substr(li, matcher->getStartingIndex() - li);
- ret += matcher->replaceWithGroups(replacementText);
- li = matcher->getEndingIndex();
- }
- ret += str.substr(li);
+ matcher->setString(str);
+ while (matcher->findNextMatch())
+ {
+ ret += str.Mid(li, matcher->getStartingIndex() - li);
+ ret += matcher->replaceWithGroups(replacementText);
+ li = matcher->getEndingIndex();
+ }
+ ret += str.Mid(li);
- return ret;
+ return ret;
}
-std::vector<bkstring> WCPattern::split(const bkstring & str, const bool keepEmptys, const unsigned long limit)
+std::vector<CMString> WCPattern::split(const CMString & str, const bool keepEmptys, const unsigned long limit)
{
- unsigned long lim = (limit == 0 ? MAX_QMATCH : limit);
- int li = 0;
- std::vector<bkstring> ret;
+ unsigned long lim = (limit == 0 ? MAX_QMATCH : limit);
+ int li = 0;
+ std::vector<CMString> ret;
- matcher->setString(str);
+ matcher->setString(str);
- while (matcher->findNextMatch() && ret.size() < lim)
- {
- if (matcher->getStartingIndex() == 0 && keepEmptys) ret.push_back(L"");
- if ((matcher->getStartingIndex() != matcher->getEndingIndex()) || keepEmptys)
- {
- if (li != matcher->getStartingIndex() || keepEmptys)
- {
- ret.push_back(str.substr(li, matcher->getStartingIndex() - li));
- }
- li = matcher->getEndingIndex();
- }
- }
- if (li < (int)str.size()) ret.push_back(str.substr(li));
+ while (matcher->findNextMatch() && ret.size() < lim)
+ {
+ if (matcher->getStartingIndex() == 0 && keepEmptys) ret.push_back(L"");
+ if ((matcher->getStartingIndex() != matcher->getEndingIndex()) || keepEmptys)
+ {
+ if (li != matcher->getStartingIndex() || keepEmptys)
+ {
+ ret.push_back(str.Mid(li, matcher->getStartingIndex() - li));
+ }
+ li = matcher->getEndingIndex();
+ }
+ }
+ if (li < str.GetLength()) ret.push_back(str.Mid(li));
- return ret;
+ return ret;
}
-std::vector<bkstring> WCPattern::findAll(const bkstring & str)
+std::vector<CMString> WCPattern::findAll(const CMString & str)
{
- matcher->setString(str);
- return matcher->findAll();
+ matcher->setString(str);
+ return matcher->findAll();
}
-bool WCPattern::matches(const bkstring & str)
+bool WCPattern::matches(const CMString & str)
{
- matcher->setString(str);
- return matcher->matches();
+ matcher->setString(str);
+ return matcher->matches();
}
unsigned long WCPattern::getFlags() const
{
- return flags;
+ return flags;
}
-bkstring WCPattern::getWCPattern() const
+CMString WCPattern::getWCPattern() const
{
- return pattern;
+ return pattern;
}
-WCMatcher * WCPattern::createWCMatcher(const bkstring & str)
+WCMatcher * WCPattern::createWCMatcher(const CMString & str)
{
- return new WCMatcher(this, str);
+ return new WCMatcher(this, str);
}
// NFAUNode
@@ -1196,227 +1133,233 @@ NFAUNode::NFAUNode() { next = NULL; }
NFAUNode::~NFAUNode() { }
void NFAUNode::findAllNodes(std::map<NFAUNode*, bool> & soFar)
{
- if (soFar.find(this) == soFar.end()) return;
- soFar[this] = 1;
- if (next) next->findAllNodes(soFar);
+ if (soFar.find(this) == soFar.end()) return;
+ soFar[this] = 1;
+ if (next) next->findAllNodes(soFar);
}
// NFACharUNode
NFACharUNode::NFACharUNode(const wchar_t c) { ch = c; }
-int NFACharUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
+int NFACharUNode::match(const CMString & str, WCMatcher * matcher, const int curInd) const
{
- if (curInd < (int)str.size() && str[curInd] == ch) return next->match(str, matcher, curInd + 1);
- return -1;
+ if (curInd < str.GetLength() && str[curInd] == ch)
+ return next->match(str, matcher, curInd + 1);
+ return -1;
}
// NFACICharUNode
NFACICharUNode::NFACICharUNode(const wchar_t c) { ch = to_lower(c); }
-int NFACICharUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
+int NFACICharUNode::match(const CMString & str, WCMatcher * matcher, const int curInd) const
{
- if (curInd < (int)str.size() && to_lower(str[curInd]) == ch) return next->match(str, matcher, curInd + 1);
- return -1;
+ if (curInd < str.GetLength() && to_lower(str[curInd]) == ch)
+ return next->match(str, matcher, curInd + 1);
+ return -1;
}
// NFAStartUNode
NFAStartUNode::NFAStartUNode() { }
-int NFAStartUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
-{
- int ret = -1, ci = curInd;
-
- matcher->starts[0] = curInd;
- if ((matcher->getFlags() & WCMatcher::MATCH_ENTIRE_STRING) == (unsigned int)WCMatcher::MATCH_ENTIRE_STRING)
- {
- if (curInd != 0)
- {
- matcher->starts[0] = -1;
- return -1;
- }
- return next->match(str, matcher, 0);
- }
- while ((ret = next->match(str, matcher, ci)) == -1 && ci < (int)str.size())
- {
- matcher->clearGroups();
- matcher->starts[0] = ++ci;
- }
- if (ret < 0) matcher->starts[0] = -1;
- return ret;
+int NFAStartUNode::match(const CMString & str, WCMatcher * matcher, const int curInd) const
+{
+ int ret = -1, ci = curInd;
+
+ matcher->starts[0] = curInd;
+ if ((matcher->getFlags() & WCMatcher::MATCH_ENTIRE_STRING) == (unsigned int)WCMatcher::MATCH_ENTIRE_STRING)
+ {
+ if (curInd != 0)
+ {
+ matcher->starts[0] = -1;
+ return -1;
+ }
+ return next->match(str, matcher, 0);
+ }
+ while ((ret = next->match(str, matcher, ci)) == -1 && ci < str.GetLength())
+ {
+ matcher->clearGroups();
+ matcher->starts[0] = ++ci;
+ }
+ if (ret < 0) matcher->starts[0] = -1;
+ return ret;
}
// NFAEndUNode
NFAEndUNode::NFAEndUNode() { }
-int NFAEndUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
+int NFAEndUNode::match(const CMString & str, WCMatcher * matcher, const int curInd) const
{
- matcher->ends[0] = curInd;
- if ((matcher->getFlags() & WCMatcher::MATCH_ENTIRE_STRING) != 0)
- {
- if (curInd == (int)str.size()) return curInd;
- matcher->ends[0] = -1;
- return -1;
- }
- return curInd;
+ matcher->ends[0] = curInd;
+ if ((matcher->getFlags() & WCMatcher::MATCH_ENTIRE_STRING) != 0)
+ {
+ if (curInd == str.GetLength())
+ return curInd;
+ matcher->ends[0] = -1;
+ return -1;
+ }
+ return curInd;
}
// NFAQuantifierUNode
void NFAQuantifierUNode::findAllNodes(std::map<NFAUNode*, bool> & soFar)
{
- inner->findAllNodes(soFar);
- NFAUNode::findAllNodes(soFar);
+ inner->findAllNodes(soFar);
+ NFAUNode::findAllNodes(soFar);
}
NFAQuantifierUNode::NFAQuantifierUNode(WCPattern * pat, NFAUNode * internal, const int minMatch, const int maxMatch)
{
- inner = internal;
- inner->next = pat->registerNode(new NFAAcceptUNode);
- min = (minMatch < WCPattern::MIN_QMATCH) ? WCPattern::MIN_QMATCH : minMatch;
- max = (maxMatch > WCPattern::MAX_QMATCH) ? WCPattern::MAX_QMATCH : maxMatch;
+ inner = internal;
+ inner->next = pat->registerNode(new NFAAcceptUNode);
+ min = (minMatch < WCPattern::MIN_QMATCH) ? WCPattern::MIN_QMATCH : minMatch;
+ max = (maxMatch > WCPattern::MAX_QMATCH) ? WCPattern::MAX_QMATCH : maxMatch;
}
-int NFAQuantifierUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
+int NFAQuantifierUNode::match(const CMString & str, WCMatcher * matcher, const int curInd) const
{
- int i0, i1, i2 = 0;
+ int i0, i1, i2 = 0;
- i0 = i1 = curInd;
- while (i2 < min)
- {
+ i0 = i1 = curInd;
+ while (i2 < min)
+ {
- ++i2;
- i1 = inner->match(str, matcher, i0);
- if (i1 <= i0) return i1; // i1 < i0 means i1 is -1
- i0 = i1;
- }
+ ++i2;
+ i1 = inner->match(str, matcher, i0);
+ if (i1 <= i0) return i1; // i1 < i0 means i1 is -1
+ i0 = i1;
+ }
- return i1;
+ return i1;
}
// NFAGreedyQuantifierUNode
NFAGreedyQuantifierUNode::NFAGreedyQuantifierUNode(WCPattern * pat, NFAUNode * internal, const int minMatch, const int maxMatch)
- : NFAQuantifierUNode(pat, internal, minMatch, maxMatch) { }
-int NFAGreedyQuantifierUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
+: NFAQuantifierUNode(pat, internal, minMatch, maxMatch) { }
+int NFAGreedyQuantifierUNode::match(const CMString & str, WCMatcher * matcher, const int curInd) const
{
- int t = NFAQuantifierUNode::match(str, matcher, curInd);
- if (t != -1) return matchInternal(str, matcher, t, min);
- return t;
+ int t = NFAQuantifierUNode::match(str, matcher, curInd);
+ if (t != -1) return matchInternal(str, matcher, t, min);
+ return t;
}
-int NFAGreedyQuantifierUNode::matchInternal(const bkstring & str, WCMatcher * matcher, const int curInd, const int soFar) const
+int NFAGreedyQuantifierUNode::matchInternal(const CMString & str, WCMatcher * matcher, const int curInd, const int soFar) const
{
- if (soFar >= max) return next->match(str, matcher, curInd);
+ if (soFar >= max) return next->match(str, matcher, curInd);
- int i, j;
+ int i, j;
- i = inner->match(str, matcher, curInd);
- if (i != -1)
- {
- j = matchInternal(str, matcher, i, soFar + 1);
- if (j != -1) return j;
- }
- return next->match(str, matcher, curInd);
+ i = inner->match(str, matcher, curInd);
+ if (i != -1)
+ {
+ j = matchInternal(str, matcher, i, soFar + 1);
+ if (j != -1) return j;
+ }
+ return next->match(str, matcher, curInd);
}
// NFALazyQuantifierUNode
NFALazyQuantifierUNode::NFALazyQuantifierUNode(WCPattern * pat, NFAUNode * internal, const int minMatch, const int maxMatch)
- : NFAQuantifierUNode(pat, internal, minMatch, maxMatch) { }
-int NFALazyQuantifierUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
-{
- int i, j, m = NFAQuantifierUNode::match(str, matcher, curInd);
-
- if (m == -1) return -1;
-
- for (i = min; i < max; ++i)
- {
- j = next->match(str, matcher, m);
- if (j == -1)
- {
- j = inner->match(str, matcher, m);
- // if j < m, then j is -1, so we bail.
- // if j == m, then we would just go and call next->match on the same index,
- // but it already failed trying to match right there, so we know we can
- // just bail
- if (j <= m) return -1;
- m = j;
- }
- else return j;
- }
- return next->match(str, matcher, m);
+: NFAQuantifierUNode(pat, internal, minMatch, maxMatch) { }
+int NFALazyQuantifierUNode::match(const CMString & str, WCMatcher * matcher, const int curInd) const
+{
+ int i, j, m = NFAQuantifierUNode::match(str, matcher, curInd);
+
+ if (m == -1) return -1;
+
+ for (i = min; i < max; ++i)
+ {
+ j = next->match(str, matcher, m);
+ if (j == -1)
+ {
+ j = inner->match(str, matcher, m);
+ // if j < m, then j is -1, so we bail.
+ // if j == m, then we would just go and call next->match on the same index,
+ // but it already failed trying to match right there, so we know we can
+ // just bail
+ if (j <= m) return -1;
+ m = j;
+ }
+ else return j;
+ }
+ return next->match(str, matcher, m);
}
// NFAPossessiveQuantifierUNode
NFAPossessiveQuantifierUNode::NFAPossessiveQuantifierUNode(WCPattern * pat, NFAUNode * internal, const int minMatch, const int maxMatch)
- : NFAQuantifierUNode(pat, internal, minMatch, maxMatch) { }
-int NFAPossessiveQuantifierUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
+: NFAQuantifierUNode(pat, internal, minMatch, maxMatch) { }
+int NFAPossessiveQuantifierUNode::match(const CMString & str, WCMatcher * matcher, const int curInd) const
{
- int i, j, m = NFAQuantifierUNode::match(str, matcher, curInd);
+ int i, j, m = NFAQuantifierUNode::match(str, matcher, curInd);
- if (m == -1) return -1;
- for (i = min; i < max; ++i)
- {
- j = inner->match(str, matcher, m);
- if (j <= m) return next->match(str, matcher, m);
- m = j;
- }
- return next->match(str, matcher, m);
+ if (m == -1) return -1;
+ for (i = min; i < max; ++i)
+ {
+ j = inner->match(str, matcher, m);
+ if (j <= m) return next->match(str, matcher, m);
+ m = j;
+ }
+ return next->match(str, matcher, m);
}
// NFAAcceptUNode
NFAAcceptUNode::NFAAcceptUNode() { }
-int NFAAcceptUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
+int NFAAcceptUNode::match(const CMString & str, WCMatcher * matcher, const int curInd) const
{
- if (!next) return curInd;
- else return next->match(str, matcher, curInd);
+ if (!next) return curInd;
+ else return next->match(str, matcher, curInd);
}
// NFAClassUNode
NFAClassUNode::NFAClassUNode(const bool invert)
{
- inv = invert;
+ inv = invert;
}
-NFAClassUNode::NFAClassUNode(const bkstring & clazz, const bool invert)
+NFAClassUNode::NFAClassUNode(const CMString & clazz, const bool invert)
{
- inv = invert;
- for (int i = 0; i < (int)clazz.size(); ++i) vals[clazz[i]] = 1;
+ inv = invert;
+ for (int i = 0; i < clazz.GetLength(); ++i)
+ vals[clazz[i]] = 1;
}
-int NFAClassUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
+int NFAClassUNode::match(const CMString & str, WCMatcher * matcher, const int curInd) const
{
- if (curInd < (int)str.size() && ((vals.find(str[curInd]) != vals.end()) ^ inv))
- {
- return next->match(str, matcher, curInd + 1);
- }
- return -1;
+ if (curInd < str.GetLength() && ((vals.find(str[curInd]) != vals.end()) ^ inv))
+ {
+ return next->match(str, matcher, curInd + 1);
+ }
+ return -1;
}
// NFACIClassUNode
NFACIClassUNode::NFACIClassUNode(const bool invert)
{
- inv = invert;
+ inv = invert;
}
-NFACIClassUNode::NFACIClassUNode(const bkstring & clazz, const bool invert)
+
+NFACIClassUNode::NFACIClassUNode(const CMString & clazz, const bool invert)
{
- inv = invert;
- for (int i = 0; i < (int)clazz.size(); ++i) vals[to_lower(clazz[i])] = 1;
+ inv = invert;
+ for (int i = 0; i < (int)clazz.GetLength(); ++i)
+ vals[to_lower(clazz[i])] = 1;
}
-int NFACIClassUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
+
+int NFACIClassUNode::match(const CMString & str, WCMatcher * matcher, const int curInd) const
{
- if (curInd < (int)str.size() && ((vals.find(to_lower(str[curInd])) != vals.end()) ^ inv))
- {
- return next->match(str, matcher, curInd + 1);
- }
- return -1;
+ if (curInd < str.GetLength() && ((vals.find(to_lower(str[curInd])) != vals.end()) ^ inv))
+ return next->match(str, matcher, curInd + 1);
+
+ return -1;
}
// NFASubStartUNode
NFASubStartUNode::NFASubStartUNode() { }
-int NFASubStartUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
+int NFASubStartUNode::match(const CMString & str, WCMatcher * matcher, const int curInd) const
{
- return next->match(str, matcher, curInd);
+ return next->match(str, matcher, curInd);
}
// NFAOrUNode
@@ -1424,39 +1367,39 @@ int NFASubStartUNode::match(const bkstring & str, WCMatcher * matcher, const int
NFAOrUNode::NFAOrUNode(NFAUNode * first, NFAUNode * second) : one(first), two(second) { }
void NFAOrUNode::findAllNodes(std::map<NFAUNode*, bool> & soFar)
{
- if (one) one->findAllNodes(soFar);
- if (two) two->findAllNodes(soFar);
- NFAUNode::findAllNodes(soFar);
+ if (one) one->findAllNodes(soFar);
+ if (two) two->findAllNodes(soFar);
+ NFAUNode::findAllNodes(soFar);
}
-int NFAOrUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
+int NFAOrUNode::match(const CMString & str, WCMatcher * matcher, const int curInd) const
{
- int ci = one->match(str, matcher, curInd);
+ int ci = one->match(str, matcher, curInd);
- if (ci != -1) ci = next->match(str, matcher, ci);
- if (ci != -1) return ci;
- if (ci == -1) ci = two->match(str, matcher, curInd);
- if (ci != -1) ci = next->match(str, matcher, ci);
- return ci;
+ if (ci != -1) ci = next->match(str, matcher, ci);
+ if (ci != -1) return ci;
+ if (ci == -1) ci = two->match(str, matcher, curInd);
+ if (ci != -1) ci = next->match(str, matcher, ci);
+ return ci;
}
// NFAQuoteUNode
-NFAQuoteUNode::NFAQuoteUNode(const bkstring & quoted) : qStr(quoted) { }
-int NFAQuoteUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
+NFAQuoteUNode::NFAQuoteUNode(const CMString & quoted) : qStr(quoted) { }
+int NFAQuoteUNode::match(const CMString & str, WCMatcher * matcher, const int curInd) const
{
- if (curInd + qStr.size() > str.size()) return -1;
- if (str.substr(curInd, qStr.size()) != qStr) return -1;
- return next->match(str, matcher, curInd + (int)qStr.size());
+ if (curInd + qStr.GetLength() > str.GetLength()) return -1;
+ if (str.Mid(curInd, qStr.GetLength()) != qStr) return -1;
+ return next->match(str, matcher, curInd + qStr.GetLength());
}
// NFACIQuoteUNode
-NFACIQuoteUNode::NFACIQuoteUNode(const bkstring & quoted) : qStr(quoted) { }
-int NFACIQuoteUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
+NFACIQuoteUNode::NFACIQuoteUNode(const CMString & quoted) : qStr(quoted) { }
+int NFACIQuoteUNode::match(const CMString & str, WCMatcher * matcher, const int curInd) const
{
- if (curInd + qStr.size() > str.size()) return -1;
- if (str_icmp(str.substr(curInd, qStr.size()).c_str(), qStr.c_str())) return -1;
- return next->match(str, matcher, (int)qStr.size());
+ if (curInd + qStr.GetLength() > str.GetLength()) return -1;
+ if (str_icmp(str.Mid(curInd, qStr.GetLength()).c_str(), qStr.c_str())) return -1;
+ return next->match(str, matcher, qStr.GetLength());
}
// NFALookAheadUNode
@@ -1464,267 +1407,268 @@ int NFACIQuoteUNode::match(const bkstring & str, WCMatcher * matcher, const int
NFALookAheadUNode::NFALookAheadUNode(NFAUNode * internal, const bool positive) : NFAUNode(), pos(positive), inner(internal) { }
void NFALookAheadUNode::findAllNodes(std::map<NFAUNode*, bool> & soFar)
{
- if (inner) inner->findAllNodes(soFar);
- NFAUNode::findAllNodes(soFar);
+ if (inner) inner->findAllNodes(soFar);
+ NFAUNode::findAllNodes(soFar);
}
-int NFALookAheadUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
+int NFALookAheadUNode::match(const CMString & str, WCMatcher * matcher, const int curInd) const
{
- return ((inner->match(str, matcher, curInd) == -1) ^ pos) ? next->match(str, matcher, curInd) : -1;
+ return ((inner->match(str, matcher, curInd) == -1) ^ pos) ? next->match(str, matcher, curInd) : -1;
}
// NFALookBehindUNode
-NFALookBehindUNode::NFALookBehindUNode(const bkstring & str, const bool positive) : pos(positive), mStr(str) { }
-int NFALookBehindUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
-{
- if (pos)
- {
- if (curInd < (int)mStr.size()) return -1;
- if (str.substr(curInd - mStr.size(), mStr.size()) == mStr) return next->match(str, matcher, curInd);
- }
- else
- {
- if (curInd < (int)mStr.size()) return next->match(str, matcher, curInd);
- if (str.substr(curInd - mStr.size(), mStr.size()) == mStr) return -1;
- return next->match(str, matcher, curInd);
- }
- return -1;
+NFALookBehindUNode::NFALookBehindUNode(const CMString & str, const bool positive) : pos(positive), mStr(str) { }
+int NFALookBehindUNode::match(const CMString & str, WCMatcher * matcher, const int curInd) const
+{
+ if (pos) {
+ if (curInd < mStr.GetLength())
+ return -1;
+ if (str.Mid(curInd - mStr.GetLength(), mStr.GetLength()) == mStr)
+ return next->match(str, matcher, curInd);
+ }
+ else {
+ if (curInd < mStr.GetLength())
+ return next->match(str, matcher, curInd);
+ if (str.Mid(curInd - mStr.GetLength(), mStr.GetLength()) == mStr)
+ return -1;
+ return next->match(str, matcher, curInd);
+ }
+ return -1;
}
// NFAStartOfLineUNode
NFAStartOfLineUNode::NFAStartOfLineUNode() { }
-int NFAStartOfLineUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
+int NFAStartOfLineUNode::match(const CMString & str, WCMatcher * matcher, const int curInd) const
{
- if (curInd == 0 || str[curInd - 1] == (wchar_t)'\n' || str[curInd - 1] == (wchar_t)'\r')
- {
- return next->match(str, matcher, curInd);
- }
- return -1;
+ if (curInd == 0 || str[curInd - 1] == '\n' || str[curInd - 1] == '\r')
+ {
+ return next->match(str, matcher, curInd);
+ }
+ return -1;
}
// NFAEndOfLineUNode
NFAEndOfLineUNode::NFAEndOfLineUNode() { }
-int NFAEndOfLineUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
+int NFAEndOfLineUNode::match(const CMString & str, WCMatcher * matcher, const int curInd) const
{
- if (curInd >= (int)str.size() || str[curInd] == (wchar_t)'\n' || str[curInd] == (wchar_t)'\r')
- {
- return next->match(str, matcher, curInd);
- }
- return -1;
+ if (curInd >= str.GetLength() || str[curInd] == '\n' || str[curInd] == '\r')
+ return next->match(str, matcher, curInd);
+
+ return -1;
}
// NFAReferenceUNode
NFAReferenceUNode::NFAReferenceUNode(const int groupIndex) : gi(groupIndex) { }
-int NFAReferenceUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
+int NFAReferenceUNode::match(const CMString & str, WCMatcher * matcher, const int curInd) const
{
- int len = matcher->ends[gi] - matcher->starts[gi];
- int ni = -1;
- if (gi < 1 || matcher->ends[gi] < matcher->starts[gi] || len == 0) ni = curInd;
- else if (curInd + len > (int)str.size()) return -1;
- else if (str.substr(curInd, len) != str.substr(matcher->starts[gi], len)) return -1;
- else ni = curInd + len;
+ int len = matcher->ends[gi] - matcher->starts[gi];
+ int ni = -1;
+ if (gi < 1 || matcher->ends[gi] < matcher->starts[gi] || len == 0) ni = curInd;
+ else if (curInd + len > (int)str.GetLength()) return -1;
+ else if (str.Mid(curInd, len) != str.Mid(matcher->starts[gi], len)) return -1;
+ else ni = curInd + len;
- return next->match(str, matcher, ni);
+ return next->match(str, matcher, ni);
}
// NFAStartOfInputUNode
NFAStartOfInputUNode::NFAStartOfInputUNode() { }
-int NFAStartOfInputUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
+int NFAStartOfInputUNode::match(const CMString & str, WCMatcher * matcher, const int curInd) const
{
- if (curInd == 0) return next->match(str, matcher, curInd);
- return -1;
+ if (curInd == 0) return next->match(str, matcher, curInd);
+ return -1;
}
// NFAEndOfInputUNode
NFAEndOfInputUNode::NFAEndOfInputUNode(const bool lookForTerm) : term(lookForTerm) { }
-int NFAEndOfInputUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
-{
- int len = (int)str.size();
- if (curInd == len) return next->match(str, matcher, curInd);
- else if (term)
- {
- if (curInd == len - 1 && (str[curInd] == (wchar_t)'\r' || str[curInd] == (wchar_t)'\n'))
- {
- return next->match(str, matcher, curInd);
- }
- else if (curInd == len - 2 && str.substr(curInd, 2) == L"\r\n")
- {
- return next->match(str, matcher, curInd);
- }
- }
- return -1;
+int NFAEndOfInputUNode::match(const CMString & str, WCMatcher * matcher, const int curInd) const
+{
+ int len = str.GetLength();
+ if (curInd == len) return next->match(str, matcher, curInd);
+ else if (term)
+ {
+ if (curInd == len - 1 && (str[curInd] == '\r' || str[curInd] == '\n'))
+ {
+ return next->match(str, matcher, curInd);
+ }
+ else if (curInd == len - 2 && str.Mid(curInd, 2) == L"\r\n")
+ {
+ return next->match(str, matcher, curInd);
+ }
+ }
+ return -1;
}
// NFAWordBoundaryUNode
NFAWordBoundaryUNode::NFAWordBoundaryUNode(const bool positive) : pos(positive) { }
-int NFAWordBoundaryUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
+int NFAWordBoundaryUNode::match(const CMString & str, WCMatcher * matcher, const int curInd) const
{
- int len = (int)str.size();
+ int len = str.GetLength();
- wchar_t c1 = (curInd - 1 < len && curInd > 0) ? str[curInd - 1] : '\n';
- wchar_t c2 = (curInd < len) ? str[curInd ] : '\n';
+ wchar_t c1 = (curInd - 1 < len && curInd > 0) ? str[curInd - 1] : '\n';
+ wchar_t c2 = (curInd < len) ? str[curInd] : '\n';
- if (curInd == len) return next->match(str, matcher, curInd);
- bool ok = is_alpha(c1) != is_alpha(c2);
- if (ok && pos) return next->match(str, matcher, curInd);
- return -1;
+ if (curInd == len) return next->match(str, matcher, curInd);
+ bool ok = is_alpha(c1) != is_alpha(c2);
+ if (ok && pos) return next->match(str, matcher, curInd);
+ return -1;
}
// NFAEndOfMatchUNode
NFAEndOfMatchUNode::NFAEndOfMatchUNode() { }
-int NFAEndOfMatchUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
+int NFAEndOfMatchUNode::match(const CMString & str, WCMatcher * matcher, const int curInd) const
{
- if (curInd == matcher->lm) return next->match(str, matcher, curInd);
- return -1;
+ if (curInd == matcher->lm) return next->match(str, matcher, curInd);
+ return -1;
}
// NFAGroupHeadUNode
NFAGroupHeadUNode::NFAGroupHeadUNode(const int groupIndex) : gi(groupIndex) { }
-int NFAGroupHeadUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
+int NFAGroupHeadUNode::match(const CMString & str, WCMatcher * matcher, const int curInd) const
{
- int ret, o = matcher->starts[gi];
+ int ret, o = matcher->starts[gi];
- matcher->starts[gi] = curInd;
- ret = next->match(str, matcher, curInd);
- if (ret < 0) matcher->starts[gi] = o;
+ matcher->starts[gi] = curInd;
+ ret = next->match(str, matcher, curInd);
+ if (ret < 0) matcher->starts[gi] = o;
- return ret;
+ return ret;
}
// NFAGroupTailUNode
NFAGroupTailUNode::NFAGroupTailUNode(const int groupIndex) : gi(groupIndex) { }
-int NFAGroupTailUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
+int NFAGroupTailUNode::match(const CMString & str, WCMatcher * matcher, const int curInd) const
{
- int ret, o = matcher->ends[gi];
+ int ret, o = matcher->ends[gi];
- matcher->ends[gi] = curInd;
- ret = next->match(str, matcher, curInd);
- if (ret < 0) matcher->ends[gi] = o;
+ matcher->ends[gi] = curInd;
+ ret = next->match(str, matcher, curInd);
+ if (ret < 0) matcher->ends[gi] = o;
- return ret;
+ return ret;
}
// NFAGroupLoopPrologueUNode
NFAGroupLoopPrologueUNode::NFAGroupLoopPrologueUNode(const int groupIndex) : gi(groupIndex) { }
-int NFAGroupLoopPrologueUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
+int NFAGroupLoopPrologueUNode::match(const CMString & str, WCMatcher * matcher, const int curInd) const
{
- int ret, o1 = matcher->groups[gi], o2 = matcher->groupPos[gi], o3 = matcher->groupIndeces[gi];
+ int ret, o1 = matcher->groups[gi], o2 = matcher->groupPos[gi], o3 = matcher->groupIndeces[gi];
- matcher->groups[gi] = 0;
- matcher->groupPos[gi] = 0;
- matcher->groupIndeces[gi] = -1;
- ret = next->match(str, matcher, curInd);
- if (ret < 0)
- {
- matcher->groups[gi] = o1;
- matcher->groupPos[gi] = o2;
- matcher->groupIndeces[gi] = o3;
- }
+ matcher->groups[gi] = 0;
+ matcher->groupPos[gi] = 0;
+ matcher->groupIndeces[gi] = -1;
+ ret = next->match(str, matcher, curInd);
+ if (ret < 0)
+ {
+ matcher->groups[gi] = o1;
+ matcher->groupPos[gi] = o2;
+ matcher->groupIndeces[gi] = o3;
+ }
- return ret;
+ return ret;
}
// NFAGroupLoopUNode
NFAGroupLoopUNode::NFAGroupLoopUNode(NFAUNode * internal, const int minMatch, const int maxMatch,
- const int groupIndex, const int matchType)
+ const int groupIndex, const int matchType)
{
- inner = internal;
- min = minMatch;
- max = maxMatch;
- gi = groupIndex;
- type = matchType;
+ inner = internal;
+ min = minMatch;
+ max = maxMatch;
+ gi = groupIndex;
+ type = matchType;
}
void NFAGroupLoopUNode::findAllNodes(std::map<NFAUNode*, bool> & soFar)
{
- if (inner) inner->findAllNodes(soFar);
- NFAUNode::findAllNodes(soFar);
-}
-int NFAGroupLoopUNode::match(const bkstring & str, WCMatcher * matcher, const int curInd) const
-{
- bool b = (curInd > matcher->groupIndeces[gi]);
-
- if (b && matcher->groups[gi] < min)
- {
- ++matcher->groups[gi];
- int o = matcher->groupIndeces[gi];
- matcher->groupIndeces[gi] = curInd;
- int ret = inner->match(str, matcher, curInd);
- if (ret < 0)
- {
- matcher->groupIndeces[gi] = o;
- --matcher->groups[gi];
- }
- return ret;
- }
- else if (!b || matcher->groups[gi] >= max)
- {
- return next->match(str, matcher, curInd);
- }
- else
- {
- switch (type)
- {
- case 0: return matchGreedy(str, matcher, curInd);
- case 1: return matchLazy(str, matcher, curInd);
- case 2: return matchPossessive(str, matcher, curInd);
- }
- }
- return -1;
-}
-int NFAGroupLoopUNode::matchGreedy(const bkstring & str, WCMatcher * matcher, const int curInd) const
-{
- int o = matcher->groupIndeces[gi]; // save our info for backtracking
- matcher->groupIndeces[gi] = curInd; // move along
- ++matcher->groups[gi];
- int ret = inner->match(str, matcher, curInd); // match internally
- if (ret < 0)
- { // if we failed, then restore info and match next
- --matcher->groups[gi];
- matcher->groupIndeces[gi] = o;
- ret = next->match(str, matcher, curInd);
- }
- return ret;
-}
-int NFAGroupLoopUNode::matchLazy(const bkstring & str, WCMatcher * matcher, const int curInd) const
-{
- int ret = next->match(str, matcher, curInd); // be lazy, just go on
- if (ret < 0)
- {
- int o = matcher->groupIndeces[gi]; // save info for backtracking
- matcher->groupIndeces[gi] = curInd; // advance our position
- ++matcher->groups[gi];
- ret = inner->match(str, matcher, curInd); // match our internal stuff
- if (ret < 0) // if we failed, then restore the info
- {
- --matcher->groups[gi];
- matcher->groupIndeces[gi] = o;
- }
- }
- return ret;
-}
-int NFAGroupLoopUNode::matchPossessive(const bkstring & str, WCMatcher * matcher, const int curInd) const
-{
- int o = matcher->groupIndeces[gi]; // save info for backtracking
- matcher->groupPos[gi] = matcher->groups[gi]; // set a flag stating we have matcher at least this much
- matcher->groupIndeces[gi] = curInd; // move along
- ++matcher->groups[gi];
- int ret = inner->match(str, matcher, curInd); // try and match again
- if (ret < 0)
- { // if we fail, back off, but to an extent
- --matcher->groups[gi];
- matcher->groupIndeces[gi] = o;
- if (matcher->groups[gi] == matcher->groupPos[gi]) ret = next->match(str, matcher, curInd);
- }
- return ret;
-} \ No newline at end of file
+ if (inner) inner->findAllNodes(soFar);
+ NFAUNode::findAllNodes(soFar);
+}
+int NFAGroupLoopUNode::match(const CMString & str, WCMatcher * matcher, const int curInd) const
+{
+ bool b = (curInd > matcher->groupIndeces[gi]);
+
+ if (b && matcher->groups[gi] < min)
+ {
+ ++matcher->groups[gi];
+ int o = matcher->groupIndeces[gi];
+ matcher->groupIndeces[gi] = curInd;
+ int ret = inner->match(str, matcher, curInd);
+ if (ret < 0)
+ {
+ matcher->groupIndeces[gi] = o;
+ --matcher->groups[gi];
+ }
+ return ret;
+ }
+ else if (!b || matcher->groups[gi] >= max)
+ {
+ return next->match(str, matcher, curInd);
+ }
+ else
+ {
+ switch (type)
+ {
+ case 0: return matchGreedy(str, matcher, curInd);
+ case 1: return matchLazy(str, matcher, curInd);
+ case 2: return matchPossessive(str, matcher, curInd);
+ }
+ }
+ return -1;
+}
+int NFAGroupLoopUNode::matchGreedy(const CMString & str, WCMatcher * matcher, const int curInd) const
+{
+ int o = matcher->groupIndeces[gi]; // save our info for backtracking
+ matcher->groupIndeces[gi] = curInd; // move along
+ ++matcher->groups[gi];
+ int ret = inner->match(str, matcher, curInd); // match internally
+ if (ret < 0)
+ { // if we failed, then restore info and match next
+ --matcher->groups[gi];
+ matcher->groupIndeces[gi] = o;
+ ret = next->match(str, matcher, curInd);
+ }
+ return ret;
+}
+int NFAGroupLoopUNode::matchLazy(const CMString & str, WCMatcher * matcher, const int curInd) const
+{
+ int ret = next->match(str, matcher, curInd); // be lazy, just go on
+ if (ret < 0)
+ {
+ int o = matcher->groupIndeces[gi]; // save info for backtracking
+ matcher->groupIndeces[gi] = curInd; // advance our position
+ ++matcher->groups[gi];
+ ret = inner->match(str, matcher, curInd); // match our internal stuff
+ if (ret < 0) // if we failed, then restore the info
+ {
+ --matcher->groups[gi];
+ matcher->groupIndeces[gi] = o;
+ }
+ }
+ return ret;
+}
+int NFAGroupLoopUNode::matchPossessive(const CMString & str, WCMatcher * matcher, const int curInd) const
+{
+ int o = matcher->groupIndeces[gi]; // save info for backtracking
+ matcher->groupPos[gi] = matcher->groups[gi]; // set a flag stating we have matcher at least this much
+ matcher->groupIndeces[gi] = curInd; // move along
+ ++matcher->groups[gi];
+ int ret = inner->match(str, matcher, curInd); // try and match again
+ if (ret < 0)
+ { // if we fail, back off, but to an extent
+ --matcher->groups[gi];
+ matcher->groupIndeces[gi] = o;
+ if (matcher->groups[gi] == matcher->groupPos[gi]) ret = next->match(str, matcher, curInd);
+ }
+ return ret;
+}