/** From the author (Jeff Stuart) " Let me start by saying this file is pretty big. If you feel up to it, you can try making changes yourself, but you would be better off to just email me at stuart@cs.ucdavis.edu if you think there is a bug, or have something useful you would like added. This project is very "near and dear" to me, so I am fairly quick to make bug fixes. The header files for WCPattern and WCMatcher are fairly well documented and the function names are pretty self-explanatory, but if you are having any trouble, feel free to email me at stuart@cs.ucdavis.edu. If you email me, make sure you put something like C++RE in the subject because I tend to delete email if I don't recognize the name and the subject is something like "I Need Your Help" or "Got A Second" or "I Found It". " */ /* Detailed documentation is provided in this class' header file @author Jeffery Stuart @since November 2004 @version 1.07.00 */ #include "..\stdafx.h" std::map WCPattern::compiledWCPatterns; std::map > WCPattern::registeredWCPatterns; const int WCPattern::MIN_QMATCH = 0x00000000; const int WCPattern::MAX_QMATCH = 0x7FFFFFFF; const unsigned long WCPattern::CASE_INSENSITIVE = 0x01; const unsigned long WCPattern::LITERAL = 0x02; const unsigned long WCPattern::DOT_MATCHES_ALL = 0x04; const unsigned long WCPattern::MULTILINE_MATCHING = 0x08; const unsigned long WCPattern::UNIX_LINE_MODE = 0x10; #define to_lower(a) (wchar_t)(UINT_PTR)CharLowerW((LPWSTR)(unsigned)a) #define is_alpha IsCharAlphaW #if defined(_WIN32) #define str_icmp mir_wstrcmpi #elif defined(__CYGWIN__) || defined(__APPLE__) #include static inline int str_icmp(const wchar_t *a, const wchar_t *b) { while (*a && *b) { const int t = (int)towlower(*a) - (int)tolower(*b); if (t) return t; ++a; ++b; } if (*a) { if (*b) return (int)towlower(*a) - (int)tolower(*b); return 1; } else if (*b) return 1; return 0; } #else #define str_icmp wcscasecmp #endif WCPattern::WCPattern(const CMString &rhs) { matcher = NULL; pattern = rhs; curInd = 0; groupCount = 0; nonCapGroupCount = 0; error = 0; head = NULL; } // convenient function in case we want to add any extra debugging output void WCPattern::raiseError() { error = 1; } NFAUNode *WCPattern::registerNode(NFAUNode *node) { nodes[node] = 1; return node; } CMString WCPattern::classUnion(CMString s1, CMString s2) const { wchar_t *out = new wchar_t[66000]; std::sort((LPTSTR)s1.GetString(), (LPTSTR)s1.GetTail()); std::sort((LPTSTR)s2.GetString(), (LPTSTR)s2.GetTail()); wchar_t *p = std::set_union(s1.GetString(), s1.GetTail(), s2.GetString(), s2.GetTail(), out); *p = 0; CMString ret = out; delete[] out; return ret; } CMString WCPattern::classIntersect(CMString s1, CMString s2) const { wchar_t *out = new wchar_t[66000]; std::sort((LPTSTR)s1.GetString(), (LPTSTR)s1.GetTail()); std::sort((LPTSTR)s2.GetString(), (LPTSTR)s2.GetTail()); *std::set_intersection(s1.GetString(), s1.GetTail(), s2.GetString(), s2.GetTail(), out) = 0; CMString ret = out; delete[] out; return ret; } CMString WCPattern::classNegate(CMString s1) const { wchar_t *out = new wchar_t[66000]; int i, ind = 0; std::map m; for (i = 0; i < s1.GetLength(); ++i) m[s1[i]] = 1; for (i = 0xFF; i >= 0; --i) if (m.find((wchar_t)i) == m.end()) out[ind++] = (wchar_t)i; out[ind] = 0; CMString ret(out, ind); delete[] out; return ret; } CMString WCPattern::classCreateRange(wchar_t low, wchar_t hi) const { wchar_t out[300]; int ind = 0; while (low != hi) out[ind++] = low++; out[ind++] = low; return CMString(out, ind); } int WCPattern::getInt(int start, int end) { int ret = 0; for (; start <= end; ++start) ret = ret * 10 + (pattern[start] - '0'); return ret; } bool WCPattern::quantifyCurly(int &sNum, int &eNum) { bool good = 1; int i, ci = curInd + 1; int commaInd = ci, endInd = ci, len = pattern.GetLength(); sNum = eNum = 0; while (endInd < len && pattern[endInd] != '}') ++endInd; while (commaInd < endInd && pattern[commaInd] != ',') ++commaInd; if (endInd >= len) { raiseError(); return 0; } for (i = ci; good && i < endInd; ++i) if (i != commaInd && !isdigit(pattern[i])) good = 0; if (!good && commaInd < endInd) { raiseError(); return 0; } if (!good) return 0; /* so now everything in here is either a comma (and there is at most one comma) or a digit */ if (commaInd == ci) // {,*} { if (endInd == commaInd + 1) { sNum = MIN_QMATCH; eNum = MAX_QMATCH; } // {,} = * else { sNum = MIN_QMATCH; eNum = getInt(commaInd + 1, endInd - 1); } // {,+} } else if (commaInd == endInd - 1) { sNum = getInt(ci, commaInd - 1); eNum = MAX_QMATCH; } // {+,} else if (commaInd == endInd) { sNum = getInt(ci, endInd - 1); eNum = sNum; } // {+} else { sNum = getInt(ci, commaInd - 1); eNum = getInt(commaInd + 1, endInd - 1); } // {+,+} curInd = endInd + 1; return 1; } NFAUNode* WCPattern::quantifyGroup(NFAUNode *start, NFAUNode *stop, const int gn) { NFAUNode *newNode = NULL; int type = 0; if (curInd < pattern.GetLength()) { wchar_t ch = (curInd + 1 >= pattern.GetLength()) ? USHRT_MAX : pattern[curInd + 1]; switch (pattern[curInd]) { case '*': ++curInd; switch (ch) { case '?': ++curInd; type = 1; break; case '+': ++curInd; type = 2; break; } newNode = registerNode(new NFAGroupLoopPrologueUNode(gn)); newNode->next = registerNode(new NFAGroupLoopUNode(start, MIN_QMATCH, MAX_QMATCH, gn, type)); stop->next = newNode->next; return newNode; case '?': ++curInd; switch (ch) { case '?': ++curInd; type = 1; break; case '+': ++curInd; type = 2; break; } newNode = registerNode(new NFAGroupLoopPrologueUNode(gn)); newNode->next = registerNode(new NFAGroupLoopUNode(start, MIN_QMATCH, 1, gn, type)); stop->next = newNode->next; return newNode; case '+': ++curInd; switch (ch) { case '?': ++curInd; type = 1; break; case '+': ++curInd; type = 2; break; } newNode = registerNode(new NFAGroupLoopPrologueUNode(gn)); newNode->next = registerNode(new NFAGroupLoopUNode(start, 1, MAX_QMATCH, gn, type)); stop->next = newNode->next; return newNode; case '{': { int s, e; if (quantifyCurly(s, e)) { ch = (curInd < pattern.GetLength()) ? pattern[curInd] : USHRT_MAX; switch (ch) { case '?': ++curInd; type = 1; break; case '+': ++curInd; type = 2; break; } newNode = registerNode(new NFAGroupLoopPrologueUNode(gn)); newNode->next = registerNode(new NFAGroupLoopUNode(start, s, e, gn, type)); stop->next = newNode->next; return newNode; } } } } return NULL; } NFAUNode* WCPattern::quantify(NFAUNode *newNode) { if (curInd < pattern.GetLength()) { wchar_t ch = (curInd + 1 >= pattern.GetLength()) ? USHRT_MAX : pattern[curInd + 1]; switch (pattern[curInd]) { case '*': ++curInd; switch (ch) { case '?': ++curInd; newNode = registerNode(new NFALazyQuantifierUNode(this, newNode, MIN_QMATCH, MAX_QMATCH)); break; case '+': ++curInd; newNode = registerNode(new NFAPossessiveQuantifierUNode(this, newNode, MIN_QMATCH, MAX_QMATCH)); break; default: newNode = registerNode(new NFAGreedyQuantifierUNode(this, newNode, MIN_QMATCH, MAX_QMATCH)); break; } break; case '?': ++curInd; switch (ch) { case '?': ++curInd; newNode = registerNode(new NFALazyQuantifierUNode(this, newNode, MIN_QMATCH, 1)); break; case '+': ++curInd; newNode = registerNode(new NFAPossessiveQuantifierUNode(this, newNode, MIN_QMATCH, 1)); break; default: newNode = registerNode(new NFAGreedyQuantifierUNode(this, newNode, MIN_QMATCH, 1)); break; } break; case '+': ++curInd; switch (ch) { case '?': ++curInd; newNode = registerNode(new NFALazyQuantifierUNode(this, newNode, 1, MAX_QMATCH)); break; case '+': ++curInd; newNode = registerNode(new NFAPossessiveQuantifierUNode(this, newNode, 1, MAX_QMATCH)); break; default: newNode = registerNode(new NFAGreedyQuantifierUNode(this, newNode, 1, MAX_QMATCH)); break; } break; case '{': int s, e; if (quantifyCurly(s, e)) { ch = (curInd < pattern.GetLength()) ? pattern[curInd] : USHRT_MAX; switch (ch) { case '?': ++curInd; newNode = registerNode(new NFALazyQuantifierUNode(this, newNode, s, e)); break; case '+': ++curInd; newNode = registerNode(new NFAPossessiveQuantifierUNode(this, newNode, s, e)); break; default: newNode = registerNode(new NFAGreedyQuantifierUNode(this, newNode, s, e)); break; } } break; } } return newNode; } CMString WCPattern::parseClass() { CMString t, ret; wchar_t ch, c1, c2; bool inv = 0, neg = 0, quo = 0; if (curInd < pattern.GetLength() && pattern[curInd] == '^') { ++curInd; neg = 1; } while (curInd < pattern.GetLength() && pattern[curInd] != ']') { ch = pattern[curInd++]; if (ch == '[') { t = parseClass(); ret = classUnion(ret, t); } else if (ch == '&' && curInd < pattern.GetLength() && pattern[curInd] == '&') { if (pattern[++curInd] != '[') { raiseError(); curInd = pattern.GetLength(); } else { ++curInd; t = parseClass(); ret = classIntersect(ret, t); } } else if (ch == '\\') { t = parseEscape(inv, quo); if (quo) { raiseError(); curInd = pattern.GetLength(); } else if (inv || t.GetLength() > 1) { // cant be part of a range (a-z) if (inv) t = classNegate(t); ret = classUnion(ret, t); } else if (curInd < pattern.GetLength() && pattern[curInd] == '-') { // part of a range (a-z) c1 = t[0]; ++curInd; if (curInd >= pattern.GetLength()) raiseError(); else { c2 = pattern[curInd++]; if (c2 == '\\') { t = parseEscape(inv, quo); if (quo) { raiseError(); curInd = pattern.GetLength(); } else if (inv || t.GetLength() > 1) raiseError(); else ret = classUnion(ret, classCreateRange(c1, c2)); } else if (c2 == '[' || c2 == ']' || c2 == '-' || c2 == '&') { raiseError(); curInd = pattern.GetLength(); } else ret = classUnion(ret, classCreateRange(c1, c2)); } } else ret = classUnion(ret, t); } else if (curInd < pattern.GetLength() && pattern[curInd] == '-') { c1 = ch; ++curInd; if (curInd >= pattern.GetLength()) raiseError(); else { c2 = pattern[curInd++]; if (c2 == '\\') { t = parseEscape(inv, quo); if (quo) { raiseError(); curInd = pattern.GetLength(); } else if (inv || t.GetLength() > 1) raiseError(); else ret = classUnion(ret, classCreateRange(c1, c2)); } else if (c2 == '[' || c2 == ']' || c2 == '-' || c2 == '&') { raiseError(); curInd = pattern.GetLength(); } else ret = classUnion(ret, classCreateRange(c1, c2)); } } else ret.AppendChar(ch); } if (curInd >= pattern.GetLength() || pattern[curInd] != ']') { raiseError(); ret = L""; } else { ++curInd; if (neg) ret = classNegate(ret); } return ret; } CMString WCPattern::parsePosix() { CMString s7 = pattern.Mid(curInd, 7); if (s7 == L"{Lower}") { curInd += 7; return L"abcdefghijklmnopqrstuvwxyz"; } if (s7 == L"{Upper}") { curInd += 7; return L"ABCDEFGHIJKLMNOPQRSTUVWXYZ"; } if (s7 == L"{Alpha}") { curInd += 7; return L"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; } if (s7 == L"{Digit}") { curInd += 7; return L"0123456789"; } if (s7 == L"{Alnum}") { curInd += 7; return L"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; } if (s7 == L"{Punct}") { curInd += 7; return L"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; } if (s7 == L"{Graph}") { curInd += 7; return L"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; } if (s7 == L"{Print}") { curInd += 7; return L"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; } if (s7 == L"{Blank}") { curInd += 7; return L" \t"; } if (s7 == L"{Space}") { curInd += 7; return L" \t\n\x0B\f\r"; } if (s7 == L"{Cntrl}") { CMString s(' ', 32 + 1); for (int i = 0; i <= 0x1F; ++i) s.SetAt(i, i); s.SetAt(0x20, 0x7F); curInd += 7; return s; } if (s7 == L"{ASCII}") { CMString s(' ', 0x80); for (int i = 0; i <= 0x7f; ++i) s.SetAt(i, i); curInd += 7; return s; } if (pattern.Mid(curInd, 8) == L"{XDigit}") { curInd += 8; return L"abcdefABCDEF0123456789"; } raiseError(); return L""; } NFAUNode* WCPattern::parseBackref() { #define is_dig(x) ((x) >= '0' && (x) <= '9') #define to_int(x) ((x) - '0') int ci = curInd; int oldRef = 0, ref = 0; while (ci < pattern.GetLength() && is_dig(pattern[ci]) && (ref < 10 || ref < groupCount)) { oldRef = ref; ref = ref * 10 + to_int(pattern[ci++]); } if (ci == pattern.GetLength()) { oldRef = ref; ++ci; } if (oldRef < 0 || ci <= curInd) { raiseError(); return registerNode(new NFAReferenceUNode(-1)); } curInd = ci; return registerNode(new NFAReferenceUNode(ref)); #undef is_dig #undef to_int } CMString WCPattern::parseOctal() { #define islowoc(x) ((x) >= '0' && (x) <= '3') #define isoc(x) ((x) >= '0' && (x) <= '7') #define fromoc(x) ((x) - '0') int ci = curInd; wchar_t ch1 = (ci + 0 < pattern.GetLength()) ? pattern[ci + 0] : USHRT_MAX; wchar_t ch2 = (ci + 1 < pattern.GetLength()) ? pattern[ci + 1] : USHRT_MAX; wchar_t ch3 = (ci + 2 < pattern.GetLength()) ? pattern[ci + 2] : USHRT_MAX; CMString s = L" "; if (islowoc(ch1) && isoc(ch2)) { curInd += 2; s.SetAt(0, fromoc(ch1) * 8 + fromoc(ch2)); if (isoc(ch3)) { ++curInd; s.SetAt(0, s[0] * 8 + fromoc(ch3)); } } else if (isoc(ch1) && isoc(ch2)) { curInd += 2; s.SetAt(0, fromoc(ch1) * 8 + fromoc(ch2)); } else raiseError(); return s; #undef islowoc #undef isoc #undef fromoc } CMString WCPattern::parseHex() { #define to_low(x) (((x) >= 'A' && (x) <= 'Z') ? ((x) - 'A' + 'a') : (x)) #define is_dig(x) ((x) >= '0' && (x) <= '9') #define is_hex(x) (is_dig(x) || (to_low(x) >= 'a' && to_low(x) <= 'f')) #define to_int(x) ((is_dig(x)) ? ((x) - '0') : (to_low(x) - 'a' + 10)) int ci = curInd; wchar_t ch1 = (ci + 0 < pattern.GetLength()) ? pattern[ci + 0] : USHRT_MAX; wchar_t ch2 = (ci + 1 < pattern.GetLength()) ? pattern[ci + 1] : USHRT_MAX; wchar_t ch3 = (ci + 2 < pattern.GetLength()) ? pattern[ci + 2] : USHRT_MAX; wchar_t ch4 = (ci + 3 < pattern.GetLength()) ? pattern[ci + 3] : USHRT_MAX; CMString s = L" "; if (is_hex(ch1) && is_hex(ch2) && is_hex(ch3) && is_hex(ch4)) { curInd += 2; s.SetAt(0, (to_int(ch1) << 12 & 0xF000) | (to_int(ch2) << 8 & 0x0F00) | (to_int(ch3) << 4 & 0x0F00) | (to_int(ch4) & 0x000F)); } else if (is_hex(ch1) && is_hex(ch2)) { curInd += 2; s.SetAt(0, (to_int(ch1) << 4 & 0xF0) | (to_int(ch2) & 0x0F)); } return s; #undef to_low #undef is_dig #undef is_hex #undef to_int } CMString WCPattern::parseEscape(bool &inv, bool &quo) { wchar_t ch = pattern[curInd++]; CMString classes; if (curInd > pattern.GetLength()) { raiseError(); return ""; } quo = 0; inv = 0; switch (ch) { case 'p': classes = parsePosix(); break; case 'P': classes = L"!!"; classes += parsePosix(); break; case 'd': classes = L"0123456789"; break; case 'D': classes = L"!!0123456789"; break; case 's': classes = L" \t\r\n\f"; break; case 'S': classes = L"!! \t\r\n\f"; break; case 'w': classes = L"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; break; case 'W': classes = L"!!abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; break; case '0': classes = parseOctal(); break; case 'x': classes = parseHex(); break; case 'Q': quo = 1; break; case 't': classes = L"\t"; break; case 'r': classes = L"\r"; break; case 'n': classes = L"\n"; break; case 'f': classes = L"\f"; break; case 'a': classes = L"\a"; break; case 'e': classes = L"\r"; break; default: classes.AppendChar(ch); break; } if (classes.Mid(0, 2) == L"!!") { classes = classes.Mid(2); inv = 1; } return classes; } NFAUNode* WCPattern::parseRegisteredWCPattern(NFAUNode **end) { int i, j; CMString s; NFAUNode *ret = NULL; for (i = curInd; i < pattern.GetLength() && pattern[i] != '}'; ++i) {} if (pattern[i] != '}') { raiseError(); return NULL; } if (i == curInd + 1) { raiseError(); return NULL; } // {} if (!((pattern[curInd] >= 'a' && pattern[curInd] <= 'z') || (pattern[curInd] >= 'A' && pattern[curInd] <= 'Z') || (pattern[curInd] == '_'))) { raiseError(); return NULL; } for (j = curInd; !error && j < i; ++j) { if (!((pattern[j] >= 'a' && pattern[j] <= 'z') || (pattern[j] >= 'A' && pattern[j] <= 'Z') || (pattern[j] >= '0' && pattern[j] <= '9') || (pattern[j] == '_'))) { raiseError(); return NULL; } } s = pattern.Mid(curInd, i - curInd); if (registeredWCPatterns.find(s) == registeredWCPatterns.end()) raiseError(); else { unsigned long oflags = flags; CMString op = pattern; int ci = i + 1; pattern = registeredWCPatterns[s].first; curInd = 0; flags = registeredWCPatterns[s].second; --groupCount; ret = parse(0, 0, end); pattern = op; curInd = ci; flags = oflags; } if (error) { *end = ret = NULL; } return ret; } // look behind should interpret everything as a literal (except \\) since the // pattern must have a concrete length NFAUNode* WCPattern::parseBehind(const bool pos, NFAUNode **end) { CMString t; while (curInd < pattern.GetLength() && pattern[curInd] != ')') { wchar_t ch = pattern[curInd++]; if (ch == '\\') { if (curInd + 1 >= pattern.GetLength()) { raiseError(); return *end = registerNode(new NFACharUNode(' ')); } ch = pattern[curInd++]; } t.AppendChar(ch); } if (curInd >= pattern.GetLength() || pattern[curInd] != ')') raiseError(); else ++curInd; return *end = registerNode(new NFALookBehindUNode(t, pos)); } NFAUNode* WCPattern::parseQuote() { bool done = 0; CMString s; while (!done) { if (curInd >= pattern.GetLength()) { raiseError(); done = 1; } else if (pattern.Mid(curInd, 2) == L"\\E") { curInd += 2; done = 1; } else if (pattern[curInd] == '\\') { s.AppendChar(pattern[++curInd]); ++curInd; } else s.AppendChar(pattern[curInd++]); } if ((flags & WCPattern::CASE_INSENSITIVE) != 0) return registerNode(new NFACIQuoteUNode(s)); return registerNode(new NFAQuoteUNode(s)); } NFAUNode* WCPattern::parse(const bool inParen, const bool inOr, NFAUNode **end) { NFAUNode *start, *cur, *next = NULL; CMString t; int grc = groupCount++; bool inv, quo; bool ahead = 0, pos = 0, noncap = 0, indep = 0; unsigned long oldFlags = flags; if (inParen) { if (pattern[curInd] == '?') { ++curInd; --groupCount; if (pattern[curInd] == ':') { noncap = 1; ++curInd; grc = --nonCapGroupCount; } else if (pattern[curInd] == '=') { ++curInd; ahead = 1; pos = 1; } else if (pattern[curInd] == '!') { ++curInd; ahead = 1; pos = 0; } else if (pattern.Mid(curInd, 2) == L"<=") { curInd += 2; return parseBehind(1, end); } else if (pattern.Mid(curInd, 2) == L"') { ++curInd; indep = 1; } else { bool negate = false, done = false; while (!done) { if (curInd >= pattern.GetLength()) { raiseError(); return NULL; } else if (negate) { switch (pattern[curInd]) { case 'i': flags &= ~WCPattern::CASE_INSENSITIVE; break; case 'd': flags &= ~WCPattern::UNIX_LINE_MODE; break; case 'm': flags &= ~WCPattern::MULTILINE_MATCHING; break; case 's': flags &= ~WCPattern::DOT_MATCHES_ALL; break; case ':': done = true; break; case ')': ++curInd; *end = registerNode(new NFALookBehindUNode(L"", true)); return *end; case '-': default: raiseError(); return NULL; } } else { switch (pattern[curInd]) { case 'i': flags |= WCPattern::CASE_INSENSITIVE; break; case 'd': flags |= WCPattern::UNIX_LINE_MODE; break; case 'm': flags |= WCPattern::MULTILINE_MATCHING; break; case 's': flags |= WCPattern::DOT_MATCHES_ALL; break; case ':': done = true; break; case '-': negate = true; break; case ')': ++curInd; *end = registerNode(new NFALookBehindUNode(L"", true)); return *end; default: raiseError(); return NULL; } } ++curInd; } noncap = 1; grc = --nonCapGroupCount; } if (noncap) cur = start = registerNode(new NFAGroupHeadUNode(grc)); else cur = start = registerNode(new NFASubStartUNode); } else cur = start = registerNode(new NFAGroupHeadUNode(grc)); } else cur = start = registerNode(new NFASubStartUNode); while (curInd < pattern.GetLength()) { wchar_t ch = pattern[curInd++]; next = NULL; if (error) return NULL; switch (ch) { case '^': if ((flags & WCPattern::MULTILINE_MATCHING) != 0) next = registerNode(new NFAStartOfLineUNode); else next = registerNode(new NFAStartOfInputUNode); break; case '$': if ((flags & WCPattern::MULTILINE_MATCHING) != 0) next = registerNode(new NFAEndOfLineUNode); else next = registerNode(new NFAEndOfInputUNode(0)); break; case '|': --groupCount; cur->next = registerNode(new NFAAcceptUNode); cur = start = registerNode(new NFAOrUNode(start, parse(inParen, 1))); break; case '\\': if (curInd < pattern.GetLength()) { bool eoi = 0; switch (pattern[curInd]) { case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': next = parseBackref(); break; case 'A': ++curInd; next = registerNode(new NFAStartOfInputUNode); break; case 'B': ++curInd; next = registerNode(new NFAWordBoundaryUNode(0)); break; case 'b': ++curInd; next = registerNode(new NFAWordBoundaryUNode(1)); break; case 'G': ++curInd; next = registerNode(new NFAEndOfMatchUNode); break; case 'Z': eoi = 1; case 'z': ++curInd; next = registerNode(new NFAEndOfInputUNode(eoi)); break; default: t = parseEscape(inv, quo); //printf("inv quo classes { %c %c %s }\n", inv ? 't' : 'f', quo ? 't' : 'f', t.c_str()); if (!quo) { if (t.GetLength() > 1 || inv) { if ((flags & WCPattern::CASE_INSENSITIVE) != 0) next = registerNode(new NFACIClassUNode(t, inv)); else next = registerNode(new NFAClassUNode(t, inv)); } else next = registerNode(new NFACharUNode(t[0])); } else next = parseQuote(); } } else raiseError(); break; case '[': if ((flags & WCPattern::CASE_INSENSITIVE) == 0) { NFAClassUNode *clazz = new NFAClassUNode(); CMString s = parseClass(); for (int i = 0; i < (int)s.GetLength(); ++i) clazz->vals[s[i]] = 1; next = registerNode(clazz); } else { NFACIClassUNode *clazz = new NFACIClassUNode(); CMString s = parseClass(); for (int i = 0; i < s.GetLength(); ++i) clazz->vals[to_lower(s[i])] = 1; next = registerNode(clazz); } break; case '.': { bool useN = 1, useR = 1; NFAClassUNode *clazz = new NFAClassUNode(1); if ((flags & WCPattern::UNIX_LINE_MODE) != 0) useR = 0; if ((flags & WCPattern::DOT_MATCHES_ALL) != 0) useN = useR = 0; if (useN) clazz->vals['\n'] = 1; if (useR) clazz->vals['\r'] = 1; next = registerNode(clazz); } break; case '(': { NFAUNode *pEnd, *t1, *t2; t1 = parse(1, 0, &pEnd); if (!t1) raiseError(); else if (t1->isGroupHeadNode() && (t2 = quantifyGroup(t1, pEnd, grc)) != NULL) { cur->next = t2; cur = t2->next; } else { cur->next = t1; cur = pEnd; } } break; case ')': if (!inParen) raiseError(); else if (inOr) { --curInd; cur = cur->next = registerNode(new NFAAcceptUNode); flags = oldFlags; return start; } else { if (ahead) { cur = cur->next = registerNode(new NFAAcceptUNode); flags = oldFlags; return *end = registerNode(new NFALookAheadUNode(start, pos)); } else if (indep) { cur = cur->next = registerNode(new NFAAcceptUNode); flags = oldFlags; return *end = registerNode(new NFAPossessiveQuantifierUNode(this, start, 1, 1)); } else { // capping or noncapping, it doesnt matter *end = cur = cur->next = registerNode(new NFAGroupTailUNode(grc)); next = quantifyGroup(start, *end, grc); if (next) { start = next; *end = next->next; } flags = oldFlags; return start; } } break; case '{': // registered pattern cur->next = parseRegisteredWCPattern(&next); if (cur->next) cur = next; break; case '*': case '+': case '?': // case '}': // case ']': raiseError(); break; default: if ((flags & WCPattern::CASE_INSENSITIVE) != 0) next = registerNode(new NFACICharUNode(ch)); else next = registerNode(new NFACharUNode(ch)); break; } if (next) cur = cur->next = quantify(next); } if (inParen) raiseError(); else { if (inOr) cur = cur->next = registerNode(new NFAAcceptUNode); if (end) *end = cur; } flags = oldFlags; if (error) return NULL; return start; } WCPattern* WCPattern::compile(const CMString &pattern, const unsigned long mode) { WCPattern *p = new WCPattern(pattern); NFAUNode *end; p->flags = mode; if ((mode & WCPattern::LITERAL) != 0) { p->head = p->registerNode(new NFAStartUNode); if ((mode & WCPattern::CASE_INSENSITIVE) != 0) p->head->next = p->registerNode(new NFACIQuoteUNode(pattern)); else p->head->next = p->registerNode(new NFAQuoteUNode(pattern)); p->head->next->next = p->registerNode(new NFAEndUNode); } else { p->head = p->parse(0, 0, &end); if (!p->head) { delete p; p = NULL; } else { if (!(p->head && p->head->isStartOfInputNode())) { NFAUNode *n = p->registerNode(new NFAStartUNode); n->next = p->head; p->head = n; } end->next = p->registerNode(new NFAEndUNode); } } if (p != NULL) p->matcher = new WCMatcher(p, L""); return p; } WCPattern* WCPattern::compileAndKeep(const CMString &pattern, const unsigned long mode) { WCPattern *ret = NULL; std::map::iterator it = compiledWCPatterns.find(pattern); if (it != compiledWCPatterns.end()) ret = it->second; else { ret = compile(pattern, mode); compiledWCPatterns[pattern] = ret; } return ret; } CMString WCPattern::replace(const CMString &pattern, const CMString &str, const CMString &replacementText, const unsigned long mode) { CMString ret; WCPattern *p = WCPattern::compile(pattern, mode); if (p) { ret = p->replace(str, replacementText); delete p; } return ret; } std::vector WCPattern::split(const CMString &pattern, const CMString &str, const bool keepEmptys, const unsigned long limit, const unsigned long mode) { std::vector ret; WCPattern *p = WCPattern::compile(pattern, mode); if (p) { ret = p->split(str, keepEmptys, limit); delete p; } return ret; } std::vector WCPattern::findAll(const CMString &pattern, const CMString &str, const unsigned long mode) { std::vector ret; WCPattern *p = WCPattern::compile(pattern, mode); if (p) { ret = p->findAll(str); delete p; } return ret; } bool WCPattern::matches(const CMString &pattern, const CMString &str, const unsigned long mode) { bool ret = 0; WCPattern *p = compile(pattern, mode); if (p) { ret = p->matches(str); delete p; } return ret; } bool WCPattern::registerWCPattern(const CMString &name, const CMString &pattern, const unsigned long mode) { WCPattern *p = WCPattern::compile(pattern, mode); if (!p) return 0; WCPattern::registeredWCPatterns[name] = std::make_pair(pattern, mode); delete p; return 1; } void WCPattern::unregisterWCPatterns() { registeredWCPatterns.clear(); } void WCPattern::clearWCPatternCache() { std::map::iterator it; for (it = compiledWCPatterns.begin(); it != compiledWCPatterns.end(); ++it) delete it->second; compiledWCPatterns.clear(); } std::pair WCPattern::findNthMatch(const CMString &pattern, const CMString &str, const int matchNum, const unsigned long mode) { std::pair ret; WCPattern *p = WCPattern::compile(pattern, mode); ret.second = -1; if (p) { int i = -1; p->matcher->setString(str); while (i < matchNum && p->matcher->findNextMatch()) { ++i; } if (i == matchNum && p->matcher->getStartingIndex() >= 0) { ret.first = p->matcher->getGroup(0); ret.second = p->matcher->getStartingIndex(); } delete p; } return ret; } WCPattern::~WCPattern() { if (matcher) delete matcher; for (std::map::iterator it = nodes.begin(); it != nodes.end(); ++it) delete it->first; } CMString WCPattern::replace(const CMString &str, const CMString &replacementText) { int li = 0; CMString ret; matcher->setString(str); while (matcher->findNextMatch()) { ret += str.Mid(li, matcher->getStartingIndex() - li); ret += matcher->replaceWithGroups(replacementText); li = matcher->getEndingIndex(); } ret += str.Mid(li); return ret; } std::vector WCPattern::split(const CMString &str, const bool keepEmptys, const unsigned long limit) { unsigned long lim = (limit == 0 ? MAX_QMATCH : limit); int li = 0; std::vector ret; matcher->setString(str); while (matcher->findNextMatch() && ret.size() < lim) { if (matcher->getStartingIndex() == 0 && keepEmptys) ret.push_back(L""); if ((matcher->getStartingIndex() != matcher->getEndingIndex()) || keepEmptys) { if (li != matcher->getStartingIndex() || keepEmptys) ret.push_back(str.Mid(li, matcher->getStartingIndex() - li)); li = matcher->getEndingIndex(); } } if (li < str.GetLength()) ret.push_back(str.Mid(li)); return ret; } std::vector WCPattern::findAll(const CMString &str) { matcher->setString(str); return matcher->findAll(); } bool WCPattern::matches(const CMString &str) { matcher->setString(str); return matcher->matches(); } unsigned long WCPattern::getFlags() const { return flags; } CMString WCPattern::getWCPattern() const { return pattern; } WCMatcher *WCPattern::createWCMatcher(const CMString &str) { return new WCMatcher(this, str); } // NFAUNode NFAUNode::NFAUNode() { next = NULL; } NFAUNode::~NFAUNode() {} void NFAUNode::findAllNodes(std::map &soFar) { if (soFar.find(this) == soFar.end()) return; soFar[this] = 1; if (next) next->findAllNodes(soFar); } // NFACharUNode NFACharUNode::NFACharUNode(const wchar_t c) { ch = c; } int NFACharUNode::match(const CMString &str, WCMatcher *matcher, const int curInd) const { if (curInd < str.GetLength() && str[curInd] == ch) return next->match(str, matcher, curInd + 1); return -1; } // NFACICharUNode NFACICharUNode::NFACICharUNode(const wchar_t c) { ch = to_lower(c); } int NFACICharUNode::match(const CMString &str, WCMatcher *matcher, const int curInd) const { if (curInd < str.GetLength() && to_lower(str[curInd]) == ch) return next->match(str, matcher, curInd + 1); return -1; } // NFAStartUNode NFAStartUNode::NFAStartUNode() {} int NFAStartUNode::match(const CMString &str, WCMatcher *matcher, const int curInd) const { int ret = -1, ci = curInd; matcher->starts[0] = curInd; if ((matcher->getFlags() & WCMatcher::MATCH_ENTIRE_STRING) == (unsigned int)WCMatcher::MATCH_ENTIRE_STRING) { if (curInd != 0) { matcher->starts[0] = -1; return -1; } return next->match(str, matcher, 0); } while ((ret = next->match(str, matcher, ci)) == -1 && ci < str.GetLength()) { matcher->clearGroups(); matcher->starts[0] = ++ci; } if (ret < 0) matcher->starts[0] = -1; return ret; } // NFAEndUNode NFAEndUNode::NFAEndUNode() {} int NFAEndUNode::match(const CMString &str, WCMatcher *matcher, const int curInd) const { matcher->ends[0] = curInd; if ((matcher->getFlags() & WCMatcher::MATCH_ENTIRE_STRING) != 0) { if (curInd == str.GetLength()) return curInd; matcher->ends[0] = -1; return -1; } return curInd; } // NFAQuantifierUNode void NFAQuantifierUNode::findAllNodes(std::map &soFar) { inner->findAllNodes(soFar); NFAUNode::findAllNodes(soFar); } NFAQuantifierUNode::NFAQuantifierUNode(WCPattern *pat, NFAUNode *internal, const int minMatch, const int maxMatch) { inner = internal; inner->next = pat->registerNode(new NFAAcceptUNode); min = (minMatch < WCPattern::MIN_QMATCH) ? WCPattern::MIN_QMATCH : minMatch; max = (maxMatch > WCPattern::MAX_QMATCH) ? WCPattern::MAX_QMATCH : maxMatch; } int NFAQuantifierUNode::match(const CMString &str, WCMatcher *matcher, const int curInd) const { int i0, i1, i2 = 0; i0 = i1 = curInd; while (i2 < min) { ++i2; i1 = inner->match(str, matcher, i0); if (i1 <= i0) return i1; // i1 < i0 means i1 is -1 i0 = i1; } return i1; } // NFAGreedyQuantifierUNode NFAGreedyQuantifierUNode::NFAGreedyQuantifierUNode(WCPattern *pat, NFAUNode *internal, const int minMatch, const int maxMatch) : NFAQuantifierUNode(pat, internal, minMatch, maxMatch) { } int NFAGreedyQuantifierUNode::match(const CMString &str, WCMatcher *matcher, const int curInd) const { int t = NFAQuantifierUNode::match(str, matcher, curInd); if (t != -1) return matchInternal(str, matcher, t, min); return t; } int NFAGreedyQuantifierUNode::matchInternal(const CMString &str, WCMatcher *matcher, const int curInd, const int soFar) const { if (soFar >= max) return next->match(str, matcher, curInd); int i = inner->match(str, matcher, curInd); if (i != -1) { int j = matchInternal(str, matcher, i, soFar + 1); if (j != -1) return j; } return next->match(str, matcher, curInd); } // NFALazyQuantifierUNode NFALazyQuantifierUNode::NFALazyQuantifierUNode(WCPattern *pat, NFAUNode *internal, const int minMatch, const int maxMatch) : NFAQuantifierUNode(pat, internal, minMatch, maxMatch) { } int NFALazyQuantifierUNode::match(const CMString &str, WCMatcher *matcher, const int curInd) const { int m = NFAQuantifierUNode::match(str, matcher, curInd); if (m == -1) return -1; for (int i = min; i < max; ++i) { int j = next->match(str, matcher, m); if (j == -1) { j = inner->match(str, matcher, m); // if j < m, then j is -1, so we bail. // if j == m, then we would just go and call next->match on the same index, // but it already failed trying to match right there, so we know we can // just bail if (j <= m) return -1; m = j; } else return j; } return next->match(str, matcher, m); } // NFAPossessiveQuantifierUNode NFAPossessiveQuantifierUNode::NFAPossessiveQuantifierUNode(WCPattern *pat, NFAUNode *internal, const int minMatch, const int maxMatch) : NFAQuantifierUNode(pat, internal, minMatch, maxMatch) { } int NFAPossessiveQuantifierUNode::match(const CMString &str, WCMatcher *matcher, const int curInd) const { int m = NFAQuantifierUNode::match(str, matcher, curInd); if (m == -1) return -1; for (int i = min; i < max; ++i) { int j = inner->match(str, matcher, m); if (j <= m) return next->match(str, matcher, m); m = j; } return next->match(str, matcher, m); } // NFAAcceptUNode NFAAcceptUNode::NFAAcceptUNode() { } int NFAAcceptUNode::match(const CMString &str, WCMatcher *matcher, const int curInd) const { if (!next) return curInd; else return next->match(str, matcher, curInd); } // NFAClassUNode NFAClassUNode::NFAClassUNode(const bool invert) { inv = invert; } NFAClassUNode::NFAClassUNode(const CMString &clazz, const bool invert) { inv = invert; for (int i = 0; i < clazz.GetLength(); ++i) vals[clazz[i]] = 1; } int NFAClassUNode::match(const CMString &str, WCMatcher *matcher, const int curInd) const { if (curInd < str.GetLength() && ((vals.find(str[curInd]) != vals.end()) ^ inv)) { return next->match(str, matcher, curInd + 1); } return -1; } // NFACIClassUNode NFACIClassUNode::NFACIClassUNode(const bool invert) { inv = invert; } NFACIClassUNode::NFACIClassUNode(const CMString &clazz, const bool invert) { inv = invert; for (int i = 0; i < (int)clazz.GetLength(); ++i) vals[to_lower(clazz[i])] = 1; } int NFACIClassUNode::match(const CMString &str, WCMatcher *matcher, const int curInd) const { if (curInd < str.GetLength() && ((vals.find(to_lower(str[curInd])) != vals.end()) ^ inv)) return next->match(str, matcher, curInd + 1); return -1; } // NFASubStartUNode NFASubStartUNode::NFASubStartUNode() { } int NFASubStartUNode::match(const CMString &str, WCMatcher *matcher, const int curInd) const { return next->match(str, matcher, curInd); } // NFAOrUNode NFAOrUNode::NFAOrUNode(NFAUNode *first, NFAUNode *second) : one(first), two(second) { } void NFAOrUNode::findAllNodes(std::map &soFar) { if (one) one->findAllNodes(soFar); if (two) two->findAllNodes(soFar); NFAUNode::findAllNodes(soFar); } int NFAOrUNode::match(const CMString &str, WCMatcher *matcher, const int curInd) const { int ci = one->match(str, matcher, curInd); if (ci != -1) ci = next->match(str, matcher, ci); if (ci != -1) return ci; if (ci == -1) ci = two->match(str, matcher, curInd); if (ci != -1) ci = next->match(str, matcher, ci); return ci; } // NFAQuoteUNode NFAQuoteUNode::NFAQuoteUNode(const CMString "ed) : qStr(quoted) { } int NFAQuoteUNode::match(const CMString &str, WCMatcher *matcher, const int curInd) const { if (curInd + qStr.GetLength() > str.GetLength()) return -1; if (str.Mid(curInd, qStr.GetLength()) != qStr) return -1; return next->match(str, matcher, curInd + qStr.GetLength()); } // NFACIQuoteUNode NFACIQuoteUNode::NFACIQuoteUNode(const CMString "ed) : qStr(quoted) { } int NFACIQuoteUNode::match(const CMString &str, WCMatcher *matcher, const int curInd) const { if (curInd + qStr.GetLength() > str.GetLength()) return -1; if (str_icmp(str.Mid(curInd, qStr.GetLength()).c_str(), qStr.c_str())) return -1; return next->match(str, matcher, qStr.GetLength()); } // NFALookAheadUNode NFALookAheadUNode::NFALookAheadUNode(NFAUNode *internal, const bool positive) : NFAUNode(), pos(positive), inner(internal) { } void NFALookAheadUNode::findAllNodes(std::map &soFar) { if (inner) inner->findAllNodes(soFar); NFAUNode::findAllNodes(soFar); } int NFALookAheadUNode::match(const CMString &str, WCMatcher *matcher, const int curInd) const { return ((inner->match(str, matcher, curInd) == -1) ^ pos) ? next->match(str, matcher, curInd) : -1; } // NFALookBehindUNode NFALookBehindUNode::NFALookBehindUNode(const CMString &str, const bool positive) : pos(positive), mStr(str) { } int NFALookBehindUNode::match(const CMString &str, WCMatcher *matcher, const int curInd) const { if (pos) { if (curInd < mStr.GetLength()) return -1; if (str.Mid(curInd - mStr.GetLength(), mStr.GetLength()) == mStr) return next->match(str, matcher, curInd); } else { if (curInd < mStr.GetLength()) return next->match(str, matcher, curInd); if (str.Mid(curInd - mStr.GetLength(), mStr.GetLength()) == mStr) return -1; return next->match(str, matcher, curInd); } return -1; } // NFAStartOfLineUNode NFAStartOfLineUNode::NFAStartOfLineUNode() { } int NFAStartOfLineUNode::match(const CMString &str, WCMatcher *matcher, const int curInd) const { if (curInd == 0 || str[curInd - 1] == '\n' || str[curInd - 1] == '\r') return next->match(str, matcher, curInd); return -1; } // NFAEndOfLineUNode NFAEndOfLineUNode::NFAEndOfLineUNode() { } int NFAEndOfLineUNode::match(const CMString &str, WCMatcher *matcher, const int curInd) const { if (curInd >= str.GetLength() || str[curInd] == '\n' || str[curInd] == '\r') return next->match(str, matcher, curInd); return -1; } // NFAReferenceUNode NFAReferenceUNode::NFAReferenceUNode(const int groupIndex) : gi(groupIndex) { } int NFAReferenceUNode::match(const CMString &str, WCMatcher *matcher, const int curInd) const { int len = matcher->ends[gi] - matcher->starts[gi]; int ni = -1; if (gi < 1 || matcher->ends[gi] < matcher->starts[gi] || len == 0) ni = curInd; else if (curInd + len > (int)str.GetLength()) return -1; else if (str.Mid(curInd, len) != str.Mid(matcher->starts[gi], len)) return -1; else ni = curInd + len; return next->match(str, matcher, ni); } // NFAStartOfInputUNode NFAStartOfInputUNode::NFAStartOfInputUNode() { } int NFAStartOfInputUNode::match(const CMString &str, WCMatcher *matcher, const int curInd) const { if (curInd == 0) return next->match(str, matcher, curInd); return -1; } // NFAEndOfInputUNode NFAEndOfInputUNode::NFAEndOfInputUNode(const bool lookForTerm) : term(lookForTerm) { } int NFAEndOfInputUNode::match(const CMString &str, WCMatcher *matcher, const int curInd) const { int len = str.GetLength(); if (curInd == len) return next->match(str, matcher, curInd); else if (term) { if (curInd == len - 1 && (str[curInd] == '\r' || str[curInd] == '\n')) { return next->match(str, matcher, curInd); } else if (curInd == len - 2 && str.Mid(curInd, 2) == L"\r\n") { return next->match(str, matcher, curInd); } } return -1; } // NFAWordBoundaryUNode NFAWordBoundaryUNode::NFAWordBoundaryUNode(const bool positive) : pos(positive) { } int NFAWordBoundaryUNode::match(const CMString &str, WCMatcher *matcher, const int curInd) const { int len = str.GetLength(); wchar_t c1 = (curInd - 1 < len && curInd > 0) ? str[curInd - 1] : '\n'; wchar_t c2 = (curInd < len) ? str[curInd] : '\n'; if (curInd == len) return next->match(str, matcher, curInd); bool ok = is_alpha(c1) != is_alpha(c2); if (ok && pos) return next->match(str, matcher, curInd); return -1; } // NFAEndOfMatchUNode NFAEndOfMatchUNode::NFAEndOfMatchUNode() { } int NFAEndOfMatchUNode::match(const CMString &str, WCMatcher *matcher, const int curInd) const { if (curInd == matcher->lm) return next->match(str, matcher, curInd); return -1; } // NFAGroupHeadUNode NFAGroupHeadUNode::NFAGroupHeadUNode(const int groupIndex) : gi(groupIndex) { } int NFAGroupHeadUNode::match(const CMString &str, WCMatcher *matcher, const int curInd) const { int ret, o = matcher->starts[gi]; matcher->starts[gi] = curInd; ret = next->match(str, matcher, curInd); if (ret < 0) matcher->starts[gi] = o; return ret; } // NFAGroupTailUNode NFAGroupTailUNode::NFAGroupTailUNode(const int groupIndex) : gi(groupIndex) { } int NFAGroupTailUNode::match(const CMString &str, WCMatcher *matcher, const int curInd) const { int ret, o = matcher->ends[gi]; matcher->ends[gi] = curInd; ret = next->match(str, matcher, curInd); if (ret < 0) matcher->ends[gi] = o; return ret; } // NFAGroupLoopPrologueUNode NFAGroupLoopPrologueUNode::NFAGroupLoopPrologueUNode(const int groupIndex) : gi(groupIndex) { } int NFAGroupLoopPrologueUNode::match(const CMString &str, WCMatcher *matcher, const int curInd) const { int ret, o1 = matcher->groups[gi], o2 = matcher->groupPos[gi], o3 = matcher->groupIndeces[gi]; matcher->groups[gi] = 0; matcher->groupPos[gi] = 0; matcher->groupIndeces[gi] = -1; ret = next->match(str, matcher, curInd); if (ret < 0) { matcher->groups[gi] = o1; matcher->groupPos[gi] = o2; matcher->groupIndeces[gi] = o3; } return ret; } // NFAGroupLoopUNode NFAGroupLoopUNode::NFAGroupLoopUNode(NFAUNode *internal, const int minMatch, const int maxMatch, const int groupIndex, const int matchType) { inner = internal; min = minMatch; max = maxMatch; gi = groupIndex; type = matchType; } void NFAGroupLoopUNode::findAllNodes(std::map &soFar) { if (inner) inner->findAllNodes(soFar); NFAUNode::findAllNodes(soFar); } int NFAGroupLoopUNode::match(const CMString &str, WCMatcher *matcher, const int curInd) const { bool b = (curInd > matcher->groupIndeces[gi]); if (b && matcher->groups[gi] < min) { ++matcher->groups[gi]; int o = matcher->groupIndeces[gi]; matcher->groupIndeces[gi] = curInd; int ret = inner->match(str, matcher, curInd); if (ret < 0) { matcher->groupIndeces[gi] = o; --matcher->groups[gi]; } return ret; } else if (!b || matcher->groups[gi] >= max) { return next->match(str, matcher, curInd); } else { switch (type) { case 0: return matchGreedy(str, matcher, curInd); case 1: return matchLazy(str, matcher, curInd); case 2: return matchPossessive(str, matcher, curInd); } } return -1; } int NFAGroupLoopUNode::matchGreedy(const CMString &str, WCMatcher *matcher, const int curInd) const { int o = matcher->groupIndeces[gi]; // save our info for backtracking matcher->groupIndeces[gi] = curInd; // move along ++matcher->groups[gi]; int ret = inner->match(str, matcher, curInd); // match internally if (ret < 0) { // if we failed, then restore info and match next --matcher->groups[gi]; matcher->groupIndeces[gi] = o; ret = next->match(str, matcher, curInd); } return ret; } int NFAGroupLoopUNode::matchLazy(const CMString &str, WCMatcher *matcher, const int curInd) const { int ret = next->match(str, matcher, curInd); // be lazy, just go on if (ret < 0) { int o = matcher->groupIndeces[gi]; // save info for backtracking matcher->groupIndeces[gi] = curInd; // advance our position ++matcher->groups[gi]; ret = inner->match(str, matcher, curInd); // match our internal stuff if (ret < 0) // if we failed, then restore the info { --matcher->groups[gi]; matcher->groupIndeces[gi] = o; } } return ret; } int NFAGroupLoopUNode::matchPossessive(const CMString &str, WCMatcher *matcher, const int curInd) const { int o = matcher->groupIndeces[gi]; // save info for backtracking matcher->groupPos[gi] = matcher->groups[gi]; // set a flag stating we have matcher at least this much matcher->groupIndeces[gi] = curInd; // move along ++matcher->groups[gi]; int ret = inner->match(str, matcher, curInd); // try and match again if (ret < 0) { // if we fail, back off, but to an extent --matcher->groups[gi]; matcher->groupIndeces[gi] = o; if (matcher->groups[gi] == matcher->groupPos[gi]) ret = next->match(str, matcher, curInd); } return ret; }