From b65ebafbb15e8e69cfa5aba221b6fd9fab1f41c7 Mon Sep 17 00:00:00 2001 From: George Hazan Date: Wed, 29 Nov 2017 18:41:39 +0300 Subject: SmaileyAdd to use pcre16 instead of the obsoleted regexp parser --- plugins/SmileyAdd/SmileyAdd.vcxproj | 10 +- plugins/SmileyAdd/src/download.cpp | 11 +- plugins/SmileyAdd/src/regexp/WCMatcher.cpp | 181 --- plugins/SmileyAdd/src/regexp/WCMatcher.h | 230 ---- plugins/SmileyAdd/src/regexp/WCPattern.cpp | 1668 ---------------------------- plugins/SmileyAdd/src/regexp/WCPattern.h | 1654 --------------------------- plugins/SmileyAdd/src/smileys.cpp | 523 +++------ plugins/SmileyAdd/src/smileys.h | 7 +- plugins/SmileyAdd/src/stdafx.h | 7 +- 9 files changed, 158 insertions(+), 4133 deletions(-) delete mode 100644 plugins/SmileyAdd/src/regexp/WCMatcher.cpp delete mode 100644 plugins/SmileyAdd/src/regexp/WCMatcher.h delete mode 100644 plugins/SmileyAdd/src/regexp/WCPattern.cpp delete mode 100644 plugins/SmileyAdd/src/regexp/WCPattern.h (limited to 'plugins') diff --git a/plugins/SmileyAdd/SmileyAdd.vcxproj b/plugins/SmileyAdd/SmileyAdd.vcxproj index 51e6354a80..68cab8ea78 100644 --- a/plugins/SmileyAdd/SmileyAdd.vcxproj +++ b/plugins/SmileyAdd/SmileyAdd.vcxproj @@ -18,6 +18,11 @@ x64 + + + {6124e997-426e-4a0b-9617-d6d577d5e7d7} + + SmileyAdd {5BBA7E51-B494-4462-BC4F-1AE5F57F9857} @@ -30,9 +35,4 @@ 4458;%(DisableSpecificWarnings) - - - ..\stdafx.h - - \ No newline at end of file diff --git a/plugins/SmileyAdd/src/download.cpp b/plugins/SmileyAdd/src/download.cpp index a305436364..7633dffab0 100644 --- a/plugins/SmileyAdd/src/download.cpp +++ b/plugins/SmileyAdd/src/download.cpp @@ -158,18 +158,13 @@ void __cdecl SmileyDownloadThread(void*) bool GetSmileyFile(CMStringW &url, const CMStringW &packstr) { - _TPattern *urlsplit = _TPattern::compile(L".*/(.*)"); - _TMatcher *m0 = urlsplit->createWCMatcher(url); - - m0->findFirstMatch(); + MRegexp16 urlsplit(L".*/(.*)"); + urlsplit.match(url); CMStringW filename; filename.AppendFormat(L"%s\\%s\\", cachepath, packstr.c_str()); int pathpos = filename.GetLength(); - filename += m0->getGroup(1); - - delete m0; - delete urlsplit; + filename += urlsplit.getGroup(1); bool needext = filename.Find('.') == -1; if (needext) diff --git a/plugins/SmileyAdd/src/regexp/WCMatcher.cpp b/plugins/SmileyAdd/src/regexp/WCMatcher.cpp deleted file mode 100644 index 12c1c0d4bd..0000000000 --- a/plugins/SmileyAdd/src/regexp/WCMatcher.cpp +++ /dev/null @@ -1,181 +0,0 @@ -#include "../stdafx.h" - -const int WCMatcher::MATCH_ENTIRE_STRING = 0x01; - -/* - Detailed documentation is provided in this class' header file - - @author Jeffery Stuart - @since November 2004 - @version 1.07.00 - */ - -WCMatcher::WCMatcher(WCPattern *pattern, const CMStringW &text) -{ - pat = pattern; - str = &text; - gc = pattern->groupCount; - ncgc = -pattern->nonCapGroupCount; - flags = 0; - matchedSomething = false; - starts = new int[gc + ncgc]; - ends = new int[gc + ncgc]; - groups = new int[gc + ncgc]; - groupPos = new int[gc + ncgc]; - groupIndeces = new int[gc + ncgc]; - starts = starts + ncgc; - ends = ends + ncgc; - groups = groups + ncgc; - groupPos = groupPos + ncgc; - groupIndeces = groupIndeces + ncgc; - for (int i = 0; i < gc; ++i) starts[i] = ends[i] = 0; -} - -WCMatcher::~WCMatcher() -{ - delete[](starts - ncgc); - delete[](ends - ncgc); - delete[](groups - ncgc); - delete[](groupIndeces - ncgc); - delete[](groupPos - ncgc); -} - -void WCMatcher::clearGroups() -{ - int i; - lm = 0; - for (i = 0; i < gc; ++i) groups[i] = starts[i] = ends[i] = -1; - for (i = 1; i <= ncgc; ++i) groups[0 - i] = starts[0 - i] = ends[0 - i] = -1; -} - -CMStringW WCMatcher::replaceWithGroups(const CMStringW &pStr) -{ - CMStringW ret; - - CMStringW t = pStr; - while (t.GetLength() > 0) { - if (t[0] == '\\') { - t.Delete(0); - if (t.GetLength() == 0) - ret += L"\\"; - else if (t[0] < '0' || t[0] > '9') { - ret += t[0]; - t.Delete(0); - } - else { - int gn = 0; - while (t.GetLength() > 0 && t[0] >= '0' && t[0] <= '9') - { - gn = gn * 10 + (t[0] - '0'); - t.Delete(0); - } - ret += getGroup(gn); - } - } - else { - ret += t[0]; - t.Delete(0); - } - } - - return ret; -} - -unsigned long WCMatcher::getFlags() const -{ - return flags; -} - -const CMStringW& WCMatcher::getText() const -{ - return *str; -} - -bool WCMatcher::matches() -{ - flags = MATCH_ENTIRE_STRING; - matchedSomething = false; - clearGroups(); - lm = 0; - return pat->head->match(*str, this, 0) == str->GetLength(); -} - -bool WCMatcher::findFirstMatch() -{ - starts[0] = 0; - flags = 0; - clearGroups(); - start = 0; - lm = 0; - ends[0] = pat->head->match(*str, this, 0); - if (ends[0] >= 0) { - matchedSomething = true; - return 1; - } - return 0; -} - -bool WCMatcher::findNextMatch() -{ - int s = starts[0], e = ends[0]; - - if (!matchedSomething) return findFirstMatch(); - if (s == e) ++e; - flags = 0; - clearGroups(); - - starts[0] = e; - if (e >= str->GetLength()) return 0; - start = e; - lm = e; - ends[0] = pat->head->match(*str, this, e); - return ends[0] >= 0; -} - -std::vector WCMatcher::findAll() -{ - std::vector ret; - reset(); - while (findNextMatch()) - ret.push_back(getGroup()); - - return ret; -} - -void WCMatcher::reset() -{ - lm = 0; - clearGroups(); - matchedSomething = false; -} - -int WCMatcher::getStartingIndex(const int groupNum) const -{ - if (groupNum < 0 || groupNum >= gc) - return -1; - return starts[groupNum]; -} - -int WCMatcher::getEndingIndex(const int groupNum) const -{ - if (groupNum < 0 || groupNum >= gc) - return -1; - return ends[groupNum]; -} - -CMStringW WCMatcher::getGroup(const int groupNum) const -{ - if (groupNum < 0 || groupNum >= gc) return L""; - if (starts[groupNum] < 0 || ends[groupNum] < 0) return L""; - return str->Mid(starts[groupNum], ends[groupNum] - starts[groupNum]); -} - -std::vector WCMatcher::getGroups(const bool includeGroupZero) const -{ - std::vector ret; - - for (int i = (includeGroupZero ? 0 : 1); i < gc; ++i) - ret.push_back(getGroup(i)); - - return ret; -} diff --git a/plugins/SmileyAdd/src/regexp/WCMatcher.h b/plugins/SmileyAdd/src/regexp/WCMatcher.h deleted file mode 100644 index 285a49c349..0000000000 --- a/plugins/SmileyAdd/src/regexp/WCMatcher.h +++ /dev/null @@ -1,230 +0,0 @@ -#ifndef __WCMATCHER_H__ -#define __WCMATCHER_H__ - -/** - A matcher is a non thread-safe object used to scan strings using a given - {@link WCPattern WCPattern} object. Using a WCMatcher is the preferred - method for scanning strings. WCMatchers are not thread-safe. WCMatchers require - very little dynamic memory, hence one is encouraged to create several - instances of a matcher when necessary as opposed to sharing a single instance - of a matcher. -

- The most common methods needed by the matcher are matches, - findNextMatch, and getGroup. matches - and findNextMatch both return success or failure, and further - details can be gathered from their documentation. -

- Unlike Java's WCMatcher, this class allows you to change the string - you are matching against. This provides a small optimization, since you no - longer need multiple matchers for a single pattern in a single thread. -

- This class also provides an extremely handy method for replacing text with - captured data via the replaceWithGroups method. A typical - invocation looks like: -

-  wchar_t buf[10000];
-  CMStringW str = "\\5 (user name \\1) uses \\7 for his/her shell and \\6 is their home directory";
-  FILE * fp = fopen("/etc/passwd", "r");
-  WCPattern::registerWCPattern("entry", "[^:]+");
-  WCPattern * p = WCPattern::compile("^({entry}):({entry}):({entry}):({entry}):({entry}):({entry}):({entry})$",
-                                 WCPattern::MULTILINE_MATCHING | WCPattern::UNIX_LINE_MODE);
-  WCMatcher * m = p->createWCMatcher("");
-  while (fgets(buf, 9999, fp))
-  {
-    m->setString(buf);
-    if (m->matches())
-    {
-      printf("%s\n", m->replaceWithGroups(str).c_str());
-    }
-  }
-  fclose(fp);
-
-  
- Calling any of the following functions before first calling - matches, findFirstMatch, or - findNextMatch results in undefined behavior and may cause your - program to crash. - -
    -
  • replaceWithGroups -
  • getStartingIndex
  • -
  • getEndingIndex
  • -
  • getGroup
  • -
  • getGroups
  • -
- -

- The function findFirstMatch will attempt to find the first match - in the input string. The same results can be obtained by first calling - reset followed by findNextMatch. -

- To eliminate the necessity of looping through a string to find all the - matching substrings, findAll was created. The function will find - all matching substrings and return them in a vector. If you need - to examine specific capture groups within the substrings, then this method - should not be used. - - @author Jeffery Stuart - @since March 2003, Stable Since November 2004 - @version 1.05.00 - @memo Mutable object used on instances of a WCPattern class - */ -class WCMatcher -{ - friend class NFAUNode; - friend class NFAStartUNode; - friend class NFAEndUNode; - friend class NFAGroupHeadUNode; - friend class NFAGroupLoopUNode; - friend class NFAGroupLoopPrologueUNode; - friend class NFAGroupTailUNode; - friend class NFALookBehindUNode; - friend class NFAStartOfLineUNode; - friend class NFAEndOfLineUNode; - friend class NFAEndOfMatchUNode; - friend class NFAReferenceUNode; - friend class WCPattern; - private: - /** - Creates a new matcher object against text using - pattern. - - @param pattern The pattern with which to search - @param text The text in which to search - */ - WCMatcher(WCPattern *pattern, const CMStringW & ext); - protected: - /// The pattern we use to match - WCPattern *pat; - /// The string in which we are matching - const CMStringW *str; - /// The starting point of our match - int start; - /// An array of the starting positions for each group - int *starts; - /// An array of the ending positions for each group - int *ends; - /// An array of private data used by NFAUNodes during matching - int *groups; - /// An array of private data used by NFAUNodes during matching - int *groupIndeces; - /// An array of private data used by NFAUNodes during matching - int *groupPos; - /// The ending index of the last match - int lm; - /// The number of capturing groups we have - int gc; - /// The number of non-capturing groups we havew - int ncgc; - /// Whether or not we have matched something (used only by findFirstMatch and findNextMatch) - int matchedSomething; - /// The flags with which we were made - unsigned long flags; - /// Called by reset to clear the group arrays - void clearGroups(); - public: - /// Used internally by match to signify we want the entire string matched - const static int MATCH_ENTIRE_STRING; - public: - /// Cleans up the dynamic memory used by this matcher - ~WCMatcher(); - /** - Replaces the contents of str with the appropriate captured - text. str should have at least one back reference, otherwise - this function does nothing. - @param str The string in which to replace text - @return A string with all backreferences appropriately replaced - */ - CMStringW replaceWithGroups(const CMStringW & str); - /** - The flags currently being used by the matcher. - @return Zero - */ - unsigned long getFlags() const; - /** - The text being searched by the matcher. - @return the text being searched by the matcher. - */ - const CMStringW& getText() const; - - /** - Scans the string from start to finish for a match. The entire string must - match for this function to return success. Group variables are - appropriately set and can be queried after this function returns. - - @return Success if and only if the entire string matches the pattern - */ - bool matches(); - /** - Scans the string for the first substring matching the pattern. The entire - string does not necessarily have to match for this function to return - success. Group variables are appropriately set and can be queried after - this function returns. - - @return Success if any substring matches the specified pattern - */ - bool findFirstMatch(); - /** - Scans the string for the next substring matching the pattern. If no calls - have been made to findFirstMatch of findNextMatch since the last call to - reset, matches, or setString, then this function's behavior results to - that of findFirstMatch. - - @return Success if another substring can be found that matches the pattern - */ - bool findNextMatch(); - /** - Returns a vector of every substring in order which matches the given - pattern. - - @return Every substring in order which matches the given pattern - */ - std::vector findAll(); - /** - Resets the internal state of the matcher - */ - void reset(); - /** - Same as getText. Left n for backwards compatibilty with old source code - @return Returns the string that is currently being used for matching - */ - inline const CMStringW& getString() const { return *str; } - /** - Sets the string to scan - @param newStr The string to scan for subsequent matches - */ - inline void setString(const CMStringW & newStr) { str = &newStr; reset(); } - - /** - Returns the starting index of the specified group. - @param groupNum The group to query - @return The starting index of the group if it was matched, -1 for an - invalid group or if the group was not matched - */ - int getStartingIndex(const int groupNum = 0) const; - /** - Returns the ending index of the specified group. - @param groupNum The group to query - @return The ending index of the group if it was matched, -1 for an - invalid group or if the group was not matched - */ - int getEndingIndex(const int groupNum = 0) const; - /** - Returns the specified group. An empty string ("") does not necessarily - mean the group was not matched. A group such as (a*b?) could be matched by - a zero length. If an empty string is returned, getStartingIndex can be - called to determine if the group was actually matched. - @param groupNum The group to query - @return The text of the group - */ - CMStringW getGroup(const int groupNum = 0) const; - /** - Returns every capture group in a vector - - @param includeGroupZero Whether or not include capture group zero - @return Every capture group - */ - std::vector getGroups(const bool includeGroupZero = 0) const; -}; - -#endif diff --git a/plugins/SmileyAdd/src/regexp/WCPattern.cpp b/plugins/SmileyAdd/src/regexp/WCPattern.cpp deleted file mode 100644 index e2b99b6f3e..0000000000 --- a/plugins/SmileyAdd/src/regexp/WCPattern.cpp +++ /dev/null @@ -1,1668 +0,0 @@ -/** - From the author (Jeff Stuart) - " - Let me start by saying this file is pretty big. If you feel up to it, you can - try making changes yourself, but you would be better off to just email me at - stuart@cs.ucdavis.edu if you think there is a bug, or have something useful you - would like added. This project is very "near and dear" to me, so I am fairly quick - to make bug fixes. The header files for WCPattern and WCMatcher are fairly well - documented and the function names are pretty self-explanatory, but if you are having - any trouble, feel free to email me at stuart@cs.ucdavis.edu. - - If you email me, make sure you put something like C++RE in the subject because - I tend to delete email if I don't recognize the name and the subject is - something like "I Need Your Help" or "Got A Second" or "I Found It". - " - */ - -/* - Detailed documentation is provided in this class' header file - - @author Jeffery Stuart - @since November 2004 - @version 1.07.00 - */ - -#include "../stdafx.h" - -std::map WCPattern::compiledWCPatterns; -std::map > WCPattern::registeredWCPatterns; - -const int WCPattern::MIN_QMATCH = 0x00000000; -const int WCPattern::MAX_QMATCH = 0x7FFFFFFF; - -const unsigned long WCPattern::CASE_INSENSITIVE = 0x01; -const unsigned long WCPattern::LITERAL = 0x02; -const unsigned long WCPattern::DOT_MATCHES_ALL = 0x04; -const unsigned long WCPattern::MULTILINE_MATCHING = 0x08; -const unsigned long WCPattern::UNIX_LINE_MODE = 0x10; - -#define to_lower(a) (wchar_t)(UINT_PTR)CharLowerW((LPWSTR)(unsigned)a) -#define is_alpha IsCharAlphaW - -#if defined(_WIN32) -#define str_icmp mir_wstrcmpi -#elif defined(__CYGWIN__) || defined(__APPLE__) -#include -static inline int str_icmp(const wchar_t *a, const wchar_t *b) -{ - while (*a && *b) { - const int t = (int)towlower(*a) - (int)tolower(*b); - if (t) return t; - ++a; ++b; - } - if (*a) { - if (*b) return (int)towlower(*a) - (int)tolower(*b); - return 1; - } - else if (*b) return 1; - return 0; -} -#else -#define str_icmp wcscasecmp -#endif - -WCPattern::WCPattern(const CMStringW &rhs) -{ - matcher = nullptr; - pattern = rhs; - curInd = 0; - groupCount = 0; - nonCapGroupCount = 0; - error = 0; - head = nullptr; -} - -// convenient function in case we want to add any extra debugging output -void WCPattern::raiseError() -{ - error = 1; -} - -NFAUNode *WCPattern::registerNode(NFAUNode *node) -{ - nodes[node] = 1; - return node; -} - -CMStringW WCPattern::classUnion(CMStringW s1, CMStringW s2) const -{ - wchar_t *out = new wchar_t[66000]; - std::sort((LPTSTR)s1.GetString(), (LPTSTR)s1.GetTail()); - std::sort((LPTSTR)s2.GetString(), (LPTSTR)s2.GetTail()); - wchar_t *p = std::set_union(s1.GetString(), s1.GetTail(), s2.GetString(), s2.GetTail(), out); *p = 0; - CMStringW ret = out; - delete[] out; - return ret; -} - -CMStringW WCPattern::classIntersect(CMStringW s1, CMStringW s2) const -{ - wchar_t *out = new wchar_t[66000]; - std::sort((LPTSTR)s1.GetString(), (LPTSTR)s1.GetTail()); - std::sort((LPTSTR)s2.GetString(), (LPTSTR)s2.GetTail()); - *std::set_intersection(s1.GetString(), s1.GetTail(), s2.GetString(), s2.GetTail(), out) = 0; - CMStringW ret = out; - delete[] out; - return ret; -} - -CMStringW WCPattern::classNegate(CMStringW s1) const -{ - wchar_t *out = new wchar_t[66000]; - int i, ind = 0; - std::map m; - - for (i = 0; i < s1.GetLength(); ++i) m[s1[i]] = 1; - for (i = 0xFF; i >= 0; --i) if (m.find((wchar_t)i) == m.end()) out[ind++] = (wchar_t)i; - out[ind] = 0; - CMStringW ret(out, ind); - delete[] out; - return ret; -} - -CMStringW WCPattern::classCreateRange(wchar_t low, wchar_t hi) const -{ - wchar_t out[300]; - int ind = 0; - while (low != hi) out[ind++] = low++; - out[ind++] = low; - return CMStringW(out, ind); -} - -int WCPattern::getInt(int start, int end) -{ - int ret = 0; - for (; start <= end; ++start) ret = ret * 10 + (pattern[start] - '0'); - return ret; -} - -bool WCPattern::quantifyCurly(int &sNum, int &eNum) -{ - bool good = 1; - int i, ci = curInd + 1; - int commaInd = ci, endInd = ci, len = pattern.GetLength(); - sNum = eNum = 0; - - while (endInd < len && pattern[endInd] != '}') ++endInd; - while (commaInd < endInd && pattern[commaInd] != ',') ++commaInd; - if (endInd >= len) { raiseError(); return 0; } - for (i = ci; good && i < endInd; ++i) if (i != commaInd && !isdigit(pattern[i])) good = 0; - if (!good && commaInd < endInd) { raiseError(); return 0; } - if (!good) return 0; - /* so now everything in here is either a comma (and there is at most one comma) or a digit */ - if (commaInd == ci) // {,*} - { - if (endInd == commaInd + 1) { sNum = MIN_QMATCH; eNum = MAX_QMATCH; } // {,} = * - else { sNum = MIN_QMATCH; eNum = getInt(commaInd + 1, endInd - 1); } // {,+} - } - else if (commaInd == endInd - 1) { sNum = getInt(ci, commaInd - 1); eNum = MAX_QMATCH; } // {+,} - else if (commaInd == endInd) { sNum = getInt(ci, endInd - 1); eNum = sNum; } // {+} - else { sNum = getInt(ci, commaInd - 1); eNum = getInt(commaInd + 1, endInd - 1); } // {+,+} - curInd = endInd + 1; - return 1; -} - -NFAUNode* WCPattern::quantifyGroup(NFAUNode *start, NFAUNode *stop, const int gn) -{ - NFAUNode *newNode = nullptr; - int type = 0; - - if (curInd < pattern.GetLength()) { - wchar_t ch = (curInd + 1 >= pattern.GetLength()) ? USHRT_MAX : pattern[curInd + 1]; - switch (pattern[curInd]) { - case '*': - ++curInd; - switch (ch) { - case '?': ++curInd; type = 1; break; - case '+': ++curInd; type = 2; break; - } - newNode = registerNode(new NFAGroupLoopPrologueUNode(gn)); - newNode->next = registerNode(new NFAGroupLoopUNode(start, MIN_QMATCH, MAX_QMATCH, gn, type)); - stop->next = newNode->next; - return newNode; - case '?': - ++curInd; - switch (ch) { - case '?': ++curInd; type = 1; break; - case '+': ++curInd; type = 2; break; - } - newNode = registerNode(new NFAGroupLoopPrologueUNode(gn)); - newNode->next = registerNode(new NFAGroupLoopUNode(start, MIN_QMATCH, 1, gn, type)); - stop->next = newNode->next; - return newNode; - case '+': - ++curInd; - switch (ch) { - case '?': ++curInd; type = 1; break; - case '+': ++curInd; type = 2; break; - } - newNode = registerNode(new NFAGroupLoopPrologueUNode(gn)); - newNode->next = registerNode(new NFAGroupLoopUNode(start, 1, MAX_QMATCH, gn, type)); - stop->next = newNode->next; - return newNode; - case '{': - { - int s, e; - if (quantifyCurly(s, e)) { - ch = (curInd < pattern.GetLength()) ? pattern[curInd] : USHRT_MAX; - switch (ch) { - case '?': ++curInd; type = 1; break; - case '+': ++curInd; type = 2; break; - } - newNode = registerNode(new NFAGroupLoopPrologueUNode(gn)); - newNode->next = registerNode(new NFAGroupLoopUNode(start, s, e, gn, type)); - stop->next = newNode->next; - return newNode; - } - } - } - } - return nullptr; -} - -NFAUNode* WCPattern::quantify(NFAUNode *newNode) -{ - if (curInd < pattern.GetLength()) { - wchar_t ch = (curInd + 1 >= pattern.GetLength()) ? USHRT_MAX : pattern[curInd + 1]; - switch (pattern[curInd]) { - case '*': - ++curInd; - switch (ch) { - case '?': ++curInd; newNode = registerNode(new NFALazyQuantifierUNode(this, newNode, MIN_QMATCH, MAX_QMATCH)); break; - case '+': ++curInd; newNode = registerNode(new NFAPossessiveQuantifierUNode(this, newNode, MIN_QMATCH, MAX_QMATCH)); break; - default: newNode = registerNode(new NFAGreedyQuantifierUNode(this, newNode, MIN_QMATCH, MAX_QMATCH)); break; - } - break; - case '?': - ++curInd; - switch (ch) { - case '?': ++curInd; newNode = registerNode(new NFALazyQuantifierUNode(this, newNode, MIN_QMATCH, 1)); break; - case '+': ++curInd; newNode = registerNode(new NFAPossessiveQuantifierUNode(this, newNode, MIN_QMATCH, 1)); break; - default: newNode = registerNode(new NFAGreedyQuantifierUNode(this, newNode, MIN_QMATCH, 1)); break; - } - break; - case '+': - ++curInd; - switch (ch) { - case '?': ++curInd; newNode = registerNode(new NFALazyQuantifierUNode(this, newNode, 1, MAX_QMATCH)); break; - case '+': ++curInd; newNode = registerNode(new NFAPossessiveQuantifierUNode(this, newNode, 1, MAX_QMATCH)); break; - default: newNode = registerNode(new NFAGreedyQuantifierUNode(this, newNode, 1, MAX_QMATCH)); break; - } - break; - case '{': - int s, e; - if (quantifyCurly(s, e)) { - ch = (curInd < pattern.GetLength()) ? pattern[curInd] : USHRT_MAX; - switch (ch) { - case '?': ++curInd; newNode = registerNode(new NFALazyQuantifierUNode(this, newNode, s, e)); break; - case '+': ++curInd; newNode = registerNode(new NFAPossessiveQuantifierUNode(this, newNode, s, e)); break; - default: newNode = registerNode(new NFAGreedyQuantifierUNode(this, newNode, s, e)); break; - } - } - break; - } - } - return newNode; -} - -CMStringW WCPattern::parseClass() -{ - CMStringW t, ret; - wchar_t ch, c1, c2; - bool inv = 0, neg = 0, quo = 0; - - if (curInd < pattern.GetLength() && pattern[curInd] == '^') { - ++curInd; - neg = 1; - } - - while (curInd < pattern.GetLength() && pattern[curInd] != ']') { - ch = pattern[curInd++]; - if (ch == '[') { - t = parseClass(); - ret = classUnion(ret, t); - } - else if (ch == '&' && curInd < pattern.GetLength() && pattern[curInd] == '&') { - if (pattern[++curInd] != '[') { - raiseError(); - curInd = pattern.GetLength(); - } - else { - ++curInd; - t = parseClass(); - ret = classIntersect(ret, t); - } - } - else if (ch == '\\') { - t = parseEscape(inv, quo); - if (quo) { - raiseError(); - curInd = pattern.GetLength(); - } - else if (inv || t.GetLength() > 1) { // cant be part of a range (a-z) - if (inv) t = classNegate(t); - ret = classUnion(ret, t); - } - else if (curInd < pattern.GetLength() && pattern[curInd] == '-') { // part of a range (a-z) - c1 = t[0]; - ++curInd; - if (curInd >= pattern.GetLength()) raiseError(); - else { - c2 = pattern[curInd++]; - if (c2 == '\\') { - t = parseEscape(inv, quo); - if (quo) { - raiseError(); - curInd = pattern.GetLength(); - } - else if (inv || t.GetLength() > 1) raiseError(); - else ret = classUnion(ret, classCreateRange(c1, c2)); - } - else if (c2 == '[' || c2 == ']' || c2 == '-' || c2 == '&') { - raiseError(); - curInd = pattern.GetLength(); - } - else ret = classUnion(ret, classCreateRange(c1, c2)); - } - } - else ret = classUnion(ret, t); - } - else if (curInd < pattern.GetLength() && pattern[curInd] == '-') { - c1 = ch; - ++curInd; - if (curInd >= pattern.GetLength()) raiseError(); - else { - c2 = pattern[curInd++]; - if (c2 == '\\') { - t = parseEscape(inv, quo); - if (quo) { - raiseError(); - curInd = pattern.GetLength(); - } - else if (inv || t.GetLength() > 1) raiseError(); - else ret = classUnion(ret, classCreateRange(c1, c2)); - } - else if (c2 == '[' || c2 == ']' || c2 == '-' || c2 == '&') { - raiseError(); - curInd = pattern.GetLength(); - } - else ret = classUnion(ret, classCreateRange(c1, c2)); - } - } - else ret.AppendChar(ch); - } - - if (curInd >= pattern.GetLength() || pattern[curInd] != ']') { - raiseError(); - ret = L""; - } - else { - ++curInd; - if (neg) ret = classNegate(ret); - } - return ret; -} - -CMStringW WCPattern::parsePosix() -{ - CMStringW s7 = pattern.Mid(curInd, 7); - if (s7 == L"{Lower}") { curInd += 7; return L"abcdefghijklmnopqrstuvwxyz"; } - if (s7 == L"{Upper}") { curInd += 7; return L"ABCDEFGHIJKLMNOPQRSTUVWXYZ"; } - if (s7 == L"{Alpha}") { curInd += 7; return L"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; } - if (s7 == L"{Digit}") { curInd += 7; return L"0123456789"; } - if (s7 == L"{Alnum}") { curInd += 7; return L"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; } - if (s7 == L"{Punct}") { curInd += 7; return L"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; } - if (s7 == L"{Graph}") { curInd += 7; return L"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; } - if (s7 == L"{Print}") { curInd += 7; return L"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; } - if (s7 == L"{Blank}") { curInd += 7; return L" \t"; } - if (s7 == L"{Space}") { curInd += 7; return L" \t\n\x0B\f\r"; } - if (s7 == L"{Cntrl}") { - CMStringW s(' ', 32 + 1); - for (int i = 0; i <= 0x1F; ++i) - s.SetAt(i, i); - s.SetAt(0x20, 0x7F); - curInd += 7; - return s; - } - if (s7 == L"{ASCII}") { - CMStringW s(' ', 0x80); - for (int i = 0; i <= 0x7f; ++i) s.SetAt(i, i); - curInd += 7; - return s; - } - if (pattern.Mid(curInd, 8) == L"{XDigit}") { curInd += 8; return L"abcdefABCDEF0123456789"; } - raiseError(); - return L""; -} - -NFAUNode* WCPattern::parseBackref() -{ - #define is_dig(x) ((x) >= '0' && (x) <= '9') - #define to_int(x) ((x) - '0') - int ci = curInd; - int oldRef = 0, ref = 0; - - while (ci < pattern.GetLength() && is_dig(pattern[ci]) && (ref < 10 || ref < groupCount)) { - oldRef = ref; - ref = ref * 10 + to_int(pattern[ci++]); - } - if (ci == pattern.GetLength()) { - oldRef = ref; - ++ci; - } - if (oldRef < 0 || ci <= curInd) { - raiseError(); - return registerNode(new NFAReferenceUNode(-1)); - } - curInd = ci; - return registerNode(new NFAReferenceUNode(ref)); - - #undef is_dig - #undef to_int -} - -CMStringW WCPattern::parseOctal() -{ - #define islowoc(x) ((x) >= '0' && (x) <= '3') - #define isoc(x) ((x) >= '0' && (x) <= '7') - #define fromoc(x) ((x) - '0') - int ci = curInd; - wchar_t ch1 = (ci + 0 < pattern.GetLength()) ? pattern[ci + 0] : USHRT_MAX; - wchar_t ch2 = (ci + 1 < pattern.GetLength()) ? pattern[ci + 1] : USHRT_MAX; - wchar_t ch3 = (ci + 2 < pattern.GetLength()) ? pattern[ci + 2] : USHRT_MAX; - CMStringW s = L" "; - - if (islowoc(ch1) && isoc(ch2)) { - curInd += 2; - s.SetAt(0, fromoc(ch1) * 8 + fromoc(ch2)); - if (isoc(ch3)) { - ++curInd; - s.SetAt(0, s[0] * 8 + fromoc(ch3)); - } - } - else if (isoc(ch1) && isoc(ch2)) { - curInd += 2; - s.SetAt(0, fromoc(ch1) * 8 + fromoc(ch2)); - } - else raiseError(); - - return s; - #undef islowoc - #undef isoc - #undef fromoc -} - -CMStringW WCPattern::parseHex() -{ - #define to_low(x) (((x) >= 'A' && (x) <= 'Z') ? ((x) - 'A' + 'a') : (x)) - #define is_dig(x) ((x) >= '0' && (x) <= '9') - #define is_hex(x) (is_dig(x) || (to_low(x) >= 'a' && to_low(x) <= 'f')) - #define to_int(x) ((is_dig(x)) ? ((x) - '0') : (to_low(x) - 'a' + 10)) - - int ci = curInd; - wchar_t ch1 = (ci + 0 < pattern.GetLength()) ? pattern[ci + 0] : USHRT_MAX; - wchar_t ch2 = (ci + 1 < pattern.GetLength()) ? pattern[ci + 1] : USHRT_MAX; - wchar_t ch3 = (ci + 2 < pattern.GetLength()) ? pattern[ci + 2] : USHRT_MAX; - wchar_t ch4 = (ci + 3 < pattern.GetLength()) ? pattern[ci + 3] : USHRT_MAX; - CMStringW s = L" "; - - if (is_hex(ch1) && is_hex(ch2) && is_hex(ch3) && is_hex(ch4)) { - curInd += 2; - s.SetAt(0, (to_int(ch1) << 12 & 0xF000) | (to_int(ch2) << 8 & 0x0F00) | - (to_int(ch3) << 4 & 0x0F00) | (to_int(ch4) & 0x000F)); - } - else if (is_hex(ch1) && is_hex(ch2)) { - curInd += 2; - s.SetAt(0, (to_int(ch1) << 4 & 0xF0) | (to_int(ch2) & 0x0F)); - } - - return s; - #undef to_low - #undef is_dig - #undef is_hex - #undef to_int -} - -CMStringW WCPattern::parseEscape(bool &inv, bool &quo) -{ - wchar_t ch = pattern[curInd++]; - CMStringW classes; - - if (curInd > pattern.GetLength()) { - raiseError(); - return ""; - } - - quo = 0; - inv = 0; - switch (ch) { - case 'p': classes = parsePosix(); break; - case 'P': classes = L"!!"; classes += parsePosix(); break; - case 'd': classes = L"0123456789"; break; - case 'D': classes = L"!!0123456789"; break; - case 's': classes = L" \t\r\n\f"; break; - case 'S': classes = L"!! \t\r\n\f"; break; - case 'w': classes = L"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; break; - case 'W': classes = L"!!abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; break; - case '0': classes = parseOctal(); break; - case 'x': classes = parseHex(); break; - - case 'Q': quo = 1; break; - case 't': classes = L"\t"; break; - case 'r': classes = L"\r"; break; - case 'n': classes = L"\n"; break; - case 'f': classes = L"\f"; break; - case 'a': classes = L"\a"; break; - case 'e': classes = L"\r"; break; - default: - classes.AppendChar(ch); - break; - } - - if (classes.Mid(0, 2) == L"!!") { - classes = classes.Mid(2); - inv = 1; - } - return classes; -} - -NFAUNode* WCPattern::parseRegisteredWCPattern(NFAUNode **end) -{ - int i, j; - CMStringW s; - NFAUNode *ret = nullptr; - for (i = curInd; i < pattern.GetLength() && pattern[i] != '}'; ++i) {} - if (pattern[i] != '}') { raiseError(); return nullptr; } - if (i == curInd + 1) { raiseError(); return nullptr; } // {} - if (!((pattern[curInd] >= 'a' && pattern[curInd] <= 'z') || - (pattern[curInd] >= 'A' && pattern[curInd] <= 'Z') || - (pattern[curInd] == '_'))) { - raiseError(); - return nullptr; - } - - for (j = curInd; !error && j < i; ++j) { - if (!((pattern[j] >= 'a' && pattern[j] <= 'z') || - (pattern[j] >= 'A' && pattern[j] <= 'Z') || - (pattern[j] >= '0' && pattern[j] <= '9') || - (pattern[j] == '_'))) { - raiseError(); - return nullptr; - } - } - s = pattern.Mid(curInd, i - curInd); - if (registeredWCPatterns.find(s) == registeredWCPatterns.end()) raiseError(); - else { - unsigned long oflags = flags; - CMStringW op = pattern; - int ci = i + 1; - - pattern = registeredWCPatterns[s].first; - curInd = 0; - flags = registeredWCPatterns[s].second; - - --groupCount; - ret = parse(0, 0, end); - - pattern = op; - curInd = ci; - flags = oflags; - } - if (error) { *end = ret = nullptr; } - return ret; -} - -// look behind should interpret everything as a literal (except \\) since the -// pattern must have a concrete length -NFAUNode* WCPattern::parseBehind(const bool pos, NFAUNode **end) -{ - CMStringW t; - while (curInd < pattern.GetLength() && pattern[curInd] != ')') { - wchar_t ch = pattern[curInd++]; - if (ch == '\\') { - if (curInd + 1 >= pattern.GetLength()) { - raiseError(); - return *end = registerNode(new NFACharUNode(' ')); - } - ch = pattern[curInd++]; - } - t.AppendChar(ch); - } - if (curInd >= pattern.GetLength() || pattern[curInd] != ')') raiseError(); - else ++curInd; - return *end = registerNode(new NFALookBehindUNode(t, pos)); -} - -NFAUNode* WCPattern::parseQuote() -{ - bool done = 0; - CMStringW s; - - while (!done) { - if (curInd >= pattern.GetLength()) { - raiseError(); - done = 1; - } - else if (pattern.Mid(curInd, 2) == L"\\E") { - curInd += 2; - done = 1; - } - else if (pattern[curInd] == '\\') { - s.AppendChar(pattern[++curInd]); - ++curInd; - } - else s.AppendChar(pattern[curInd++]); - } - if ((flags & WCPattern::CASE_INSENSITIVE) != 0) - return registerNode(new NFACIQuoteUNode(s)); - return registerNode(new NFAQuoteUNode(s)); -} -NFAUNode* WCPattern::parse(const bool inParen, const bool inOr, NFAUNode **end) -{ - NFAUNode *start, *cur, *next = nullptr; - CMStringW t; - int grc = groupCount++; - bool inv, quo; - bool ahead = 0, pos = 0, noncap = 0, indep = 0; - unsigned long oldFlags = flags; - - if (inParen) { - if (pattern[curInd] == '?') { - ++curInd; - --groupCount; - if (pattern[curInd] == ':') { noncap = 1; ++curInd; grc = --nonCapGroupCount; } - else if (pattern[curInd] == '=') { ++curInd; ahead = 1; pos = 1; } - else if (pattern[curInd] == '!') { ++curInd; ahead = 1; pos = 0; } - else if (pattern.Mid(curInd, 2) == L"<=") { curInd += 2; return parseBehind(1, end); } - else if (pattern.Mid(curInd, 2) == L"') { ++curInd; indep = 1; } - else { - bool negate = false, done = false; - while (!done) { - if (curInd >= pattern.GetLength()) { - raiseError(); - return nullptr; - } - else if (negate) { - switch (pattern[curInd]) { - case 'i': flags &= ~WCPattern::CASE_INSENSITIVE; break; - case 'd': flags &= ~WCPattern::UNIX_LINE_MODE; break; - case 'm': flags &= ~WCPattern::MULTILINE_MATCHING; break; - case 's': flags &= ~WCPattern::DOT_MATCHES_ALL; break; - case ':': done = true; break; - case ')': - ++curInd; - *end = registerNode(new NFALookBehindUNode(L"", true)); - return *end; - case '-': - default: - raiseError(); - return nullptr; - } - } - else { - switch (pattern[curInd]) { - case 'i': flags |= WCPattern::CASE_INSENSITIVE; break; - case 'd': flags |= WCPattern::UNIX_LINE_MODE; break; - case 'm': flags |= WCPattern::MULTILINE_MATCHING; break; - case 's': flags |= WCPattern::DOT_MATCHES_ALL; break; - case ':': done = true; break; - case '-': negate = true; break; - case ')': - ++curInd; - *end = registerNode(new NFALookBehindUNode(L"", true)); - return *end; - default: - raiseError(); - return nullptr; - } - } - ++curInd; - } - noncap = 1; - grc = --nonCapGroupCount; - } - - if (noncap) cur = start = registerNode(new NFAGroupHeadUNode(grc)); - else cur = start = registerNode(new NFASubStartUNode); - } - else cur = start = registerNode(new NFAGroupHeadUNode(grc)); - } - else cur = start = registerNode(new NFASubStartUNode); - - while (curInd < pattern.GetLength()) { - wchar_t ch = pattern[curInd++]; - - next = nullptr; - if (error) return nullptr; - switch (ch) { - case '^': - if ((flags & WCPattern::MULTILINE_MATCHING) != 0) next = registerNode(new NFAStartOfLineUNode); - else next = registerNode(new NFAStartOfInputUNode); - break; - case '$': - if ((flags & WCPattern::MULTILINE_MATCHING) != 0) next = registerNode(new NFAEndOfLineUNode); - else next = registerNode(new NFAEndOfInputUNode(0)); - break; - case '|': - --groupCount; - cur->next = registerNode(new NFAAcceptUNode); - cur = start = registerNode(new NFAOrUNode(start, parse(inParen, 1))); - break; - case '\\': - if (curInd < pattern.GetLength()) { - bool eoi = 0; - switch (pattern[curInd]) { - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': next = parseBackref(); break; - case 'A': ++curInd; next = registerNode(new NFAStartOfInputUNode); break; - case 'B': ++curInd; next = registerNode(new NFAWordBoundaryUNode(0)); break; - case 'b': ++curInd; next = registerNode(new NFAWordBoundaryUNode(1)); break; - case 'G': ++curInd; next = registerNode(new NFAEndOfMatchUNode); break; - case 'Z': eoi = 1; - case 'z': ++curInd; next = registerNode(new NFAEndOfInputUNode(eoi)); break; - default: - t = parseEscape(inv, quo); - //printf("inv quo classes { %c %c %s }\n", inv ? 't' : 'f', quo ? 't' : 'f', t.c_str()); - if (!quo) { - if (t.GetLength() > 1 || inv) { - if ((flags & WCPattern::CASE_INSENSITIVE) != 0) next = registerNode(new NFACIClassUNode(t, inv)); - else next = registerNode(new NFAClassUNode(t, inv)); - } - else next = registerNode(new NFACharUNode(t[0])); - } - else next = parseQuote(); - } - } - else raiseError(); - break; - case '[': - if ((flags & WCPattern::CASE_INSENSITIVE) == 0) { - NFAClassUNode *clazz = new NFAClassUNode(); - CMStringW s = parseClass(); - for (int i = 0; i < (int)s.GetLength(); ++i) clazz->vals[s[i]] = 1; - next = registerNode(clazz); - } - else { - NFACIClassUNode *clazz = new NFACIClassUNode(); - CMStringW s = parseClass(); - for (int i = 0; i < s.GetLength(); ++i) clazz->vals[to_lower(s[i])] = 1; - next = registerNode(clazz); - } - break; - case '.': - { - bool useN = 1, useR = 1; - NFAClassUNode *clazz = new NFAClassUNode(1); - if ((flags & WCPattern::UNIX_LINE_MODE) != 0) useR = 0; - if ((flags & WCPattern::DOT_MATCHES_ALL) != 0) useN = useR = 0; - if (useN) clazz->vals['\n'] = 1; - if (useR) clazz->vals['\r'] = 1; - next = registerNode(clazz); - } - break; - case '(': - { - NFAUNode *pEnd, *t1, *t2; - t1 = parse(1, 0, &pEnd); - if (!t1) raiseError(); - else if (t1->isGroupHeadNode() && (t2 = quantifyGroup(t1, pEnd, grc)) != nullptr) { - cur->next = t2; - cur = t2->next; - } - else { - cur->next = t1; - cur = pEnd; - } - } - break; - case ')': - if (!inParen) raiseError(); - else if (inOr) { - --curInd; - cur = cur->next = registerNode(new NFAAcceptUNode); - flags = oldFlags; - return start; - } - else { - if (ahead) { - cur = cur->next = registerNode(new NFAAcceptUNode); - flags = oldFlags; - return *end = registerNode(new NFALookAheadUNode(start, pos)); - } - else if (indep) { - cur = cur->next = registerNode(new NFAAcceptUNode); - flags = oldFlags; - return *end = registerNode(new NFAPossessiveQuantifierUNode(this, start, 1, 1)); - } - else { // capping or noncapping, it doesnt matter - *end = cur = cur->next = registerNode(new NFAGroupTailUNode(grc)); - next = quantifyGroup(start, *end, grc); - if (next) { - start = next; - *end = next->next; - } - flags = oldFlags; - return start; - } - } - break; - case '{': // registered pattern - cur->next = parseRegisteredWCPattern(&next); - if (cur->next) cur = next; - break; - case '*': - case '+': - case '?': - // case '}': - // case ']': - raiseError(); - break; - default: - if ((flags & WCPattern::CASE_INSENSITIVE) != 0) next = registerNode(new NFACICharUNode(ch)); - else next = registerNode(new NFACharUNode(ch)); - break; - } - if (next) cur = cur->next = quantify(next); - } - if (inParen) raiseError(); - else { - if (inOr) cur = cur->next = registerNode(new NFAAcceptUNode); - if (end) *end = cur; - } - - flags = oldFlags; - if (error) return nullptr; - - return start; -} - -WCPattern* WCPattern::compile(const CMStringW &pattern, const unsigned long mode) -{ - WCPattern *p = new WCPattern(pattern); - NFAUNode *end; - - p->flags = mode; - if ((mode & WCPattern::LITERAL) != 0) { - p->head = p->registerNode(new NFAStartUNode); - if ((mode & WCPattern::CASE_INSENSITIVE) != 0) p->head->next = p->registerNode(new NFACIQuoteUNode(pattern)); - else p->head->next = p->registerNode(new NFAQuoteUNode(pattern)); - p->head->next->next = p->registerNode(new NFAEndUNode); - } - else { - p->head = p->parse(0, 0, &end); - if (!p->head) { - delete p; - p = nullptr; - } - else { - if (!(p->head && p->head->isStartOfInputNode())) { - NFAUNode *n = p->registerNode(new NFAStartUNode); - n->next = p->head; - p->head = n; - } - end->next = p->registerNode(new NFAEndUNode); - } - } - - if (p != nullptr) - p->matcher = new WCMatcher(p, L""); - - return p; -} - -WCPattern* WCPattern::compileAndKeep(const CMStringW &pattern, const unsigned long mode) -{ - WCPattern *ret = nullptr; - std::map::iterator it = compiledWCPatterns.find(pattern); - if (it != compiledWCPatterns.end()) - ret = it->second; - else { - ret = compile(pattern, mode); - compiledWCPatterns[pattern] = ret; - } - - return ret; -} - -CMStringW WCPattern::replace(const CMStringW &pattern, const CMStringW &str, const CMStringW &replacementText, const unsigned long mode) -{ - CMStringW ret; - WCPattern *p = WCPattern::compile(pattern, mode); - if (p) { - ret = p->replace(str, replacementText); - delete p; - } - return ret; -} - -std::vector WCPattern::split(const CMStringW &pattern, const CMStringW &str, const bool keepEmptys, const unsigned long limit, const unsigned long mode) -{ - std::vector ret; - WCPattern *p = WCPattern::compile(pattern, mode); - if (p) { - ret = p->split(str, keepEmptys, limit); - delete p; - } - return ret; -} - -std::vector WCPattern::findAll(const CMStringW &pattern, const CMStringW &str, const unsigned long mode) -{ - std::vector ret; - WCPattern *p = WCPattern::compile(pattern, mode); - if (p) { - ret = p->findAll(str); - delete p; - } - return ret; -} - -bool WCPattern::matches(const CMStringW &pattern, const CMStringW &str, const unsigned long mode) -{ - bool ret = 0; - WCPattern *p = compile(pattern, mode); - if (p) { - ret = p->matches(str); - delete p; - } - - return ret; -} - -bool WCPattern::registerWCPattern(const CMStringW &name, const CMStringW &pattern, const unsigned long mode) -{ - WCPattern *p = WCPattern::compile(pattern, mode); - if (!p) - return 0; - - WCPattern::registeredWCPatterns[name] = std::make_pair(pattern, mode); - delete p; - return 1; -} - -void WCPattern::unregisterWCPatterns() -{ - registeredWCPatterns.clear(); -} - -void WCPattern::clearWCPatternCache() -{ - std::map::iterator it; - for (it = compiledWCPatterns.begin(); it != compiledWCPatterns.end(); ++it) - delete it->second; - - compiledWCPatterns.clear(); -} - -std::pair WCPattern::findNthMatch(const CMStringW &pattern, const CMStringW &str, - const int matchNum, const unsigned long mode) -{ - std::pair ret; - WCPattern *p = WCPattern::compile(pattern, mode); - - ret.second = -1; - if (p) { - int i = -1; - p->matcher->setString(str); - while (i < matchNum && p->matcher->findNextMatch()) { ++i; } - if (i == matchNum && p->matcher->getStartingIndex() >= 0) { - ret.first = p->matcher->getGroup(0); - ret.second = p->matcher->getStartingIndex(); - } - delete p; - } - - return ret; -} - -WCPattern::~WCPattern() -{ - if (matcher) delete matcher; - for (std::map::iterator it = nodes.begin(); it != nodes.end(); ++it) - delete it->first; -} - -CMStringW WCPattern::replace(const CMStringW &str, const CMStringW &replacementText) -{ - int li = 0; - CMStringW ret; - - matcher->setString(str); - while (matcher->findNextMatch()) { - ret += str.Mid(li, matcher->getStartingIndex() - li); - ret += matcher->replaceWithGroups(replacementText); - li = matcher->getEndingIndex(); - } - ret += str.Mid(li); - - return ret; -} - -std::vector WCPattern::split(const CMStringW &str, const bool keepEmptys, const unsigned long limit) -{ - unsigned long lim = (limit == 0 ? MAX_QMATCH : limit); - int li = 0; - std::vector ret; - - matcher->setString(str); - - while (matcher->findNextMatch() && ret.size() < lim) { - if (matcher->getStartingIndex() == 0 && keepEmptys) ret.push_back(L""); - if ((matcher->getStartingIndex() != matcher->getEndingIndex()) || keepEmptys) { - if (li != matcher->getStartingIndex() || keepEmptys) - ret.push_back(str.Mid(li, matcher->getStartingIndex() - li)); - - li = matcher->getEndingIndex(); - } - } - - if (li < str.GetLength()) - ret.push_back(str.Mid(li)); - - return ret; -} - -std::vector WCPattern::findAll(const CMStringW &str) -{ - matcher->setString(str); - return matcher->findAll(); -} - -bool WCPattern::matches(const CMStringW &str) -{ - matcher->setString(str); - return matcher->matches(); -} - -unsigned long WCPattern::getFlags() const -{ - return flags; -} - -CMStringW WCPattern::getWCPattern() const -{ - return pattern; -} - -WCMatcher *WCPattern::createWCMatcher(const CMStringW &str) -{ - return new WCMatcher(this, str); -} - -// NFAUNode - -NFAUNode::NFAUNode() { next = nullptr; } -NFAUNode::~NFAUNode() {} -void NFAUNode::findAllNodes(std::map &soFar) -{ - if (soFar.find(this) == soFar.end()) return; - soFar[this] = 1; - if (next) next->findAllNodes(soFar); -} - -// NFACharUNode - -NFACharUNode::NFACharUNode(const wchar_t c) { ch = c; } -int NFACharUNode::match(const CMStringW &str, WCMatcher *matcher, const int curInd) const -{ - if (curInd < str.GetLength() && str[curInd] == ch) - return next->match(str, matcher, curInd + 1); - return -1; -} - -// NFACICharUNode - -NFACICharUNode::NFACICharUNode(const wchar_t c) { ch = to_lower(c); } -int NFACICharUNode::match(const CMStringW &str, WCMatcher *matcher, const int curInd) const -{ - if (curInd < str.GetLength() && to_lower(str[curInd]) == ch) - return next->match(str, matcher, curInd + 1); - return -1; -} - -// NFAStartUNode - -NFAStartUNode::NFAStartUNode() {} -int NFAStartUNode::match(const CMStringW &str, WCMatcher *matcher, const int curInd) const -{ - int ret = -1, ci = curInd; - - matcher->starts[0] = curInd; - if ((matcher->getFlags() & WCMatcher::MATCH_ENTIRE_STRING) == (unsigned int)WCMatcher::MATCH_ENTIRE_STRING) { - if (curInd != 0) { - matcher->starts[0] = -1; - return -1; - } - return next->match(str, matcher, 0); - } - - while ((ret = next->match(str, matcher, ci)) == -1 && ci < str.GetLength()) { - matcher->clearGroups(); - matcher->starts[0] = ++ci; - } - - if (ret < 0) - matcher->starts[0] = -1; - return ret; -} - -// NFAEndUNode - -NFAEndUNode::NFAEndUNode() {} -int NFAEndUNode::match(const CMStringW &str, WCMatcher *matcher, const int curInd) const -{ - matcher->ends[0] = curInd; - if ((matcher->getFlags() & WCMatcher::MATCH_ENTIRE_STRING) != 0) { - if (curInd == str.GetLength()) - return curInd; - - matcher->ends[0] = -1; - return -1; - } - return curInd; -} - -// NFAQuantifierUNode - -void NFAQuantifierUNode::findAllNodes(std::map &soFar) -{ - inner->findAllNodes(soFar); - NFAUNode::findAllNodes(soFar); -} - -NFAQuantifierUNode::NFAQuantifierUNode(WCPattern *pat, NFAUNode *internal, const int minMatch, const int maxMatch) -{ - inner = internal; - inner->next = pat->registerNode(new NFAAcceptUNode); - min = (minMatch < WCPattern::MIN_QMATCH) ? WCPattern::MIN_QMATCH : minMatch; - max = (maxMatch > WCPattern::MAX_QMATCH) ? WCPattern::MAX_QMATCH : maxMatch; -} - -int NFAQuantifierUNode::match(const CMStringW &str, WCMatcher *matcher, const int curInd) const -{ - int i0, i1, i2 = 0; - - i0 = i1 = curInd; - while (i2 < min) { - ++i2; - i1 = inner->match(str, matcher, i0); - if (i1 <= i0) return i1; // i1 < i0 means i1 is -1 - i0 = i1; - } - - return i1; -} -// NFAGreedyQuantifierUNode - -NFAGreedyQuantifierUNode::NFAGreedyQuantifierUNode(WCPattern *pat, NFAUNode *internal, const int minMatch, const int maxMatch) : - NFAQuantifierUNode(pat, internal, minMatch, maxMatch) -{ -} - -int NFAGreedyQuantifierUNode::match(const CMStringW &str, WCMatcher *matcher, const int curInd) const -{ - int t = NFAQuantifierUNode::match(str, matcher, curInd); - if (t != -1) return matchInternal(str, matcher, t, min); - return t; -} - -int NFAGreedyQuantifierUNode::matchInternal(const CMStringW &str, WCMatcher *matcher, const int curInd, const int soFar) const -{ - if (soFar >= max) - return next->match(str, matcher, curInd); - - int i = inner->match(str, matcher, curInd); - if (i != -1) { - int j = matchInternal(str, matcher, i, soFar + 1); - if (j != -1) - return j; - } - return next->match(str, matcher, curInd); -} - -// NFALazyQuantifierUNode - -NFALazyQuantifierUNode::NFALazyQuantifierUNode(WCPattern *pat, NFAUNode *internal, const int minMatch, const int maxMatch) : - NFAQuantifierUNode(pat, internal, minMatch, maxMatch) -{ -} - -int NFALazyQuantifierUNode::match(const CMStringW &str, WCMatcher *matcher, const int curInd) const -{ - int m = NFAQuantifierUNode::match(str, matcher, curInd); - if (m == -1) - return -1; - - for (int i = min; i < max; ++i) { - int j = next->match(str, matcher, m); - if (j == -1) { - j = inner->match(str, matcher, m); - // if j < m, then j is -1, so we bail. - // if j == m, then we would just go and call next->match on the same index, - // but it already failed trying to match right there, so we know we can - // just bail - if (j <= m) return -1; - m = j; - } - else return j; - } - return next->match(str, matcher, m); -} - -// NFAPossessiveQuantifierUNode - -NFAPossessiveQuantifierUNode::NFAPossessiveQuantifierUNode(WCPattern *pat, NFAUNode *internal, const int minMatch, const int maxMatch) : - NFAQuantifierUNode(pat, internal, minMatch, maxMatch) -{ -} - -int NFAPossessiveQuantifierUNode::match(const CMStringW &str, WCMatcher *matcher, const int curInd) const -{ - int m = NFAQuantifierUNode::match(str, matcher, curInd); - if (m == -1) - return -1; - - for (int i = min; i < max; ++i) { - int j = inner->match(str, matcher, m); - if (j <= m) - return next->match(str, matcher, m); - m = j; - } - return next->match(str, matcher, m); -} - -// NFAAcceptUNode - -NFAAcceptUNode::NFAAcceptUNode() -{ -} - -int NFAAcceptUNode::match(const CMStringW &str, WCMatcher *matcher, const int curInd) const -{ - if (!next) return curInd; - else return next->match(str, matcher, curInd); -} - -// NFAClassUNode - -NFAClassUNode::NFAClassUNode(const bool invert) -{ - inv = invert; -} - -NFAClassUNode::NFAClassUNode(const CMStringW &clazz, const bool invert) -{ - inv = invert; - for (int i = 0; i < clazz.GetLength(); ++i) - vals[clazz[i]] = 1; -} - -int NFAClassUNode::match(const CMStringW &str, WCMatcher *matcher, const int curInd) const -{ - if (curInd < str.GetLength() && ((vals.find(str[curInd]) != vals.end()) ^ inv)) { - return next->match(str, matcher, curInd + 1); - } - return -1; -} - -// NFACIClassUNode - -NFACIClassUNode::NFACIClassUNode(const bool invert) -{ - inv = invert; -} - -NFACIClassUNode::NFACIClassUNode(const CMStringW &clazz, const bool invert) -{ - inv = invert; - for (int i = 0; i < (int)clazz.GetLength(); ++i) - vals[to_lower(clazz[i])] = 1; -} - -int NFACIClassUNode::match(const CMStringW &str, WCMatcher *matcher, const int curInd) const -{ - if (curInd < str.GetLength() && ((vals.find(to_lower(str[curInd])) != vals.end()) ^ inv)) - return next->match(str, matcher, curInd + 1); - - return -1; -} - -// NFASubStartUNode - -NFASubStartUNode::NFASubStartUNode() -{ -} - -int NFASubStartUNode::match(const CMStringW &str, WCMatcher *matcher, const int curInd) const -{ - return next->match(str, matcher, curInd); -} - -// NFAOrUNode - -NFAOrUNode::NFAOrUNode(NFAUNode *first, NFAUNode *second) : - one(first), two(second) -{ -} - -void NFAOrUNode::findAllNodes(std::map &soFar) -{ - if (one) one->findAllNodes(soFar); - if (two) two->findAllNodes(soFar); - NFAUNode::findAllNodes(soFar); -} -int NFAOrUNode::match(const CMStringW &str, WCMatcher *matcher, const int curInd) const -{ - int ci = one->match(str, matcher, curInd); - - if (ci != -1) ci = next->match(str, matcher, ci); - if (ci != -1) return ci; - if (ci == -1) ci = two->match(str, matcher, curInd); - if (ci != -1) ci = next->match(str, matcher, ci); - return ci; -} - -// NFAQuoteUNode - -NFAQuoteUNode::NFAQuoteUNode(const CMStringW "ed) : - qStr(quoted) -{ -} - -int NFAQuoteUNode::match(const CMStringW &str, WCMatcher *matcher, const int curInd) const -{ - if (curInd + qStr.GetLength() > str.GetLength()) return -1; - if (str.Mid(curInd, qStr.GetLength()) != qStr) return -1; - return next->match(str, matcher, curInd + qStr.GetLength()); -} - -// NFACIQuoteUNode - -NFACIQuoteUNode::NFACIQuoteUNode(const CMStringW "ed) : - qStr(quoted) -{ -} - -int NFACIQuoteUNode::match(const CMStringW &str, WCMatcher *matcher, const int curInd) const -{ - if (curInd + qStr.GetLength() > str.GetLength()) return -1; - if (str_icmp(str.Mid(curInd, qStr.GetLength()).c_str(), qStr.c_str())) return -1; - return next->match(str, matcher, qStr.GetLength()); -} - -// NFALookAheadUNode - -NFALookAheadUNode::NFALookAheadUNode(NFAUNode *internal, const bool positive) : - NFAUNode(), pos(positive), inner(internal) -{ -} - -void NFALookAheadUNode::findAllNodes(std::map &soFar) -{ - if (inner) inner->findAllNodes(soFar); - NFAUNode::findAllNodes(soFar); -} - -int NFALookAheadUNode::match(const CMStringW &str, WCMatcher *matcher, const int curInd) const -{ - return ((inner->match(str, matcher, curInd) == -1) ^ pos) ? next->match(str, matcher, curInd) : -1; -} - -// NFALookBehindUNode - -NFALookBehindUNode::NFALookBehindUNode(const CMStringW &str, const bool positive) : - pos(positive), mStr(str) -{ -} - -int NFALookBehindUNode::match(const CMStringW &str, WCMatcher *matcher, const int curInd) const -{ - if (pos) { - if (curInd < mStr.GetLength()) - return -1; - if (str.Mid(curInd - mStr.GetLength(), mStr.GetLength()) == mStr) - return next->match(str, matcher, curInd); - } - else { - if (curInd < mStr.GetLength()) - return next->match(str, matcher, curInd); - if (str.Mid(curInd - mStr.GetLength(), mStr.GetLength()) == mStr) - return -1; - return next->match(str, matcher, curInd); - } - return -1; -} - -// NFAStartOfLineUNode - -NFAStartOfLineUNode::NFAStartOfLineUNode() -{ -} - -int NFAStartOfLineUNode::match(const CMStringW &str, WCMatcher *matcher, const int curInd) const -{ - if (curInd == 0 || str[curInd - 1] == '\n' || str[curInd - 1] == '\r') - return next->match(str, matcher, curInd); - - return -1; -} - -// NFAEndOfLineUNode - -NFAEndOfLineUNode::NFAEndOfLineUNode() -{ -} - -int NFAEndOfLineUNode::match(const CMStringW &str, WCMatcher *matcher, const int curInd) const -{ - if (curInd >= str.GetLength() || str[curInd] == '\n' || str[curInd] == '\r') - return next->match(str, matcher, curInd); - - return -1; -} - -// NFAReferenceUNode - -NFAReferenceUNode::NFAReferenceUNode(const int groupIndex) : - gi(groupIndex) -{ -} - -int NFAReferenceUNode::match(const CMStringW &str, WCMatcher *matcher, const int curInd) const -{ - int len = matcher->ends[gi] - matcher->starts[gi]; - int ni = -1; - if (gi < 1 || matcher->ends[gi] < matcher->starts[gi] || len == 0) ni = curInd; - else if (curInd + len > (int)str.GetLength()) return -1; - else if (str.Mid(curInd, len) != str.Mid(matcher->starts[gi], len)) return -1; - else ni = curInd + len; - - return next->match(str, matcher, ni); -} - -// NFAStartOfInputUNode - -NFAStartOfInputUNode::NFAStartOfInputUNode() -{ -} - -int NFAStartOfInputUNode::match(const CMStringW &str, WCMatcher *matcher, const int curInd) const -{ - if (curInd == 0) return next->match(str, matcher, curInd); - return -1; -} - -// NFAEndOfInputUNode - -NFAEndOfInputUNode::NFAEndOfInputUNode(const bool lookForTerm) : - term(lookForTerm) -{ -} - -int NFAEndOfInputUNode::match(const CMStringW &str, WCMatcher *matcher, const int curInd) const -{ - int len = str.GetLength(); - if (curInd == len) return next->match(str, matcher, curInd); - else if (term) { - if (curInd == len - 1 && (str[curInd] == '\r' || str[curInd] == '\n')) { - return next->match(str, matcher, curInd); - } - else if (curInd == len - 2 && str.Mid(curInd, 2) == L"\r\n") { - return next->match(str, matcher, curInd); - } - } - return -1; -} - -// NFAWordBoundaryUNode - -NFAWordBoundaryUNode::NFAWordBoundaryUNode(const bool positive) : - pos(positive) -{ -} - -int NFAWordBoundaryUNode::match(const CMStringW &str, WCMatcher *matcher, const int curInd) const -{ - int len = str.GetLength(); - - wchar_t c1 = (curInd - 1 < len && curInd > 0) ? str[curInd - 1] : '\n'; - wchar_t c2 = (curInd < len) ? str[curInd] : '\n'; - - if (curInd == len) return next->match(str, matcher, curInd); - bool ok = is_alpha(c1) != is_alpha(c2); - if (ok && pos) return next->match(str, matcher, curInd); - return -1; -} - -// NFAEndOfMatchUNode - -NFAEndOfMatchUNode::NFAEndOfMatchUNode() -{ -} - -int NFAEndOfMatchUNode::match(const CMStringW &str, WCMatcher *matcher, const int curInd) const -{ - if (curInd == matcher->lm) return next->match(str, matcher, curInd); - return -1; -} - -// NFAGroupHeadUNode - -NFAGroupHeadUNode::NFAGroupHeadUNode(const int groupIndex) : - gi(groupIndex) -{ -} - -int NFAGroupHeadUNode::match(const CMStringW &str, WCMatcher *matcher, const int curInd) const -{ - int ret, o = matcher->starts[gi]; - - matcher->starts[gi] = curInd; - ret = next->match(str, matcher, curInd); - if (ret < 0) matcher->starts[gi] = o; - - return ret; -} - -// NFAGroupTailUNode - -NFAGroupTailUNode::NFAGroupTailUNode(const int groupIndex) : - gi(groupIndex) -{ -} - -int NFAGroupTailUNode::match(const CMStringW &str, WCMatcher *matcher, const int curInd) const -{ - int ret, o = matcher->ends[gi]; - - matcher->ends[gi] = curInd; - ret = next->match(str, matcher, curInd); - if (ret < 0) matcher->ends[gi] = o; - - return ret; -} - -// NFAGroupLoopPrologueUNode - -NFAGroupLoopPrologueUNode::NFAGroupLoopPrologueUNode(const int groupIndex) : - gi(groupIndex) -{ -} - -int NFAGroupLoopPrologueUNode::match(const CMStringW &str, WCMatcher *matcher, const int curInd) const -{ - int ret, o1 = matcher->groups[gi], o2 = matcher->groupPos[gi], o3 = matcher->groupIndeces[gi]; - - matcher->groups[gi] = 0; - matcher->groupPos[gi] = 0; - matcher->groupIndeces[gi] = -1; - ret = next->match(str, matcher, curInd); - if (ret < 0) { - matcher->groups[gi] = o1; - matcher->groupPos[gi] = o2; - matcher->groupIndeces[gi] = o3; - } - - return ret; -} - -// NFAGroupLoopUNode - -NFAGroupLoopUNode::NFAGroupLoopUNode(NFAUNode *internal, const int minMatch, const int maxMatch, - const int groupIndex, const int matchType) -{ - inner = internal; - min = minMatch; - max = maxMatch; - gi = groupIndex; - type = matchType; -} - -void NFAGroupLoopUNode::findAllNodes(std::map &soFar) -{ - if (inner) inner->findAllNodes(soFar); - NFAUNode::findAllNodes(soFar); -} - -int NFAGroupLoopUNode::match(const CMStringW &str, WCMatcher *matcher, const int curInd) const -{ - bool b = (curInd > matcher->groupIndeces[gi]); - - if (b && matcher->groups[gi] < min) { - ++matcher->groups[gi]; - int o = matcher->groupIndeces[gi]; - matcher->groupIndeces[gi] = curInd; - int ret = inner->match(str, matcher, curInd); - if (ret < 0) { - matcher->groupIndeces[gi] = o; - --matcher->groups[gi]; - } - return ret; - } - else if (!b || matcher->groups[gi] >= max) { - return next->match(str, matcher, curInd); - } - else { - switch (type) { - case 0: return matchGreedy(str, matcher, curInd); - case 1: return matchLazy(str, matcher, curInd); - case 2: return matchPossessive(str, matcher, curInd); - } - } - return -1; -} - -int NFAGroupLoopUNode::matchGreedy(const CMStringW &str, WCMatcher *matcher, const int curInd) const -{ - int o = matcher->groupIndeces[gi]; // save our info for backtracking - matcher->groupIndeces[gi] = curInd; // move along - ++matcher->groups[gi]; - int ret = inner->match(str, matcher, curInd); // match internally - if (ret < 0) { // if we failed, then restore info and match next - --matcher->groups[gi]; - matcher->groupIndeces[gi] = o; - ret = next->match(str, matcher, curInd); - } - return ret; -} - -int NFAGroupLoopUNode::matchLazy(const CMStringW &str, WCMatcher *matcher, const int curInd) const -{ - int ret = next->match(str, matcher, curInd); // be lazy, just go on - if (ret < 0) { - int o = matcher->groupIndeces[gi]; // save info for backtracking - matcher->groupIndeces[gi] = curInd; // advance our position - ++matcher->groups[gi]; - ret = inner->match(str, matcher, curInd); // match our internal stuff - if (ret < 0) // if we failed, then restore the info - { - --matcher->groups[gi]; - matcher->groupIndeces[gi] = o; - } - } - return ret; -} - -int NFAGroupLoopUNode::matchPossessive(const CMStringW &str, WCMatcher *matcher, const int curInd) const -{ - int o = matcher->groupIndeces[gi]; // save info for backtracking - matcher->groupPos[gi] = matcher->groups[gi]; // set a flag stating we have matcher at least this much - matcher->groupIndeces[gi] = curInd; // move along - ++matcher->groups[gi]; - int ret = inner->match(str, matcher, curInd); // try and match again - if (ret < 0) { // if we fail, back off, but to an extent - --matcher->groups[gi]; - matcher->groupIndeces[gi] = o; - if (matcher->groups[gi] == matcher->groupPos[gi]) ret = next->match(str, matcher, curInd); - } - return ret; -} diff --git a/plugins/SmileyAdd/src/regexp/WCPattern.h b/plugins/SmileyAdd/src/regexp/WCPattern.h deleted file mode 100644 index d45f734645..0000000000 --- a/plugins/SmileyAdd/src/regexp/WCPattern.h +++ /dev/null @@ -1,1654 +0,0 @@ -#ifndef __WCPATTERN_H__ -#define __WCPATTERN_H__ - -class WCMatcher; -class NFAUNode; -class NFAQuantifierUNode; - -/** - This pattern class is very similar in functionality to Java's - java.util.regex.WCPattern class. The pattern class represents an immutable - regular expression object. Instead of having a single object contain both the - regular expression object and the matching object, instead the two objects are - split apart. The {@link WCMatcher WCMatcher} class represents the maching - object. - - The WCPattern class works primarily off of "compiled" patterns. A typical - instantiation of a regular expression looks like: - -

-  WCPattern * p = WCPattern::compile(L"a*b");
-  WCMatcher * m = p->createWCMatcher(L"aaaaaab");
-  if (m->matches()) ...
-  
- - However, if you do not need to use a pattern more than once, it is often times - okay to use the WCPattern's static methods insteads. An example looks like this: - -
-  if (WCPattern::matches(L"a*b", L"aaaab")) { ... }
-  
- - This class does not currently support unicode. The unicode update for this - class is coming soon. - - This class is partially immutable. It is completely safe to call createWCMatcher - concurrently in different threads, but the other functions (e.g. split) should - not be called concurrently on the same WCPattern. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- Construct - - Matches - -
-   -
- Characters -
- x - - The character x -
- \\ - - The character \ -
- \0nn - - The character with octal ASCII value nn -
- \0nnn - - The character with octal ASCII value nnn -
- \xhh - - The character with hexadecimal ASCII value hh -
- \t - - A tab character -
- \r - - A carriage return character -
- \n - - A new-line character -
-   -
- Character Classes -
- [abc] - - Either a, b, or c -
- [^abc] - - Any character but a, b, or c -
- [a-zA-Z] - - Any character ranging from a thru z, or - A thru Z -
- [^a-zA-Z] - - Any character except those ranging from a thru - z, or A thru Z -
- [a\-z] - - Either a, -, or z -
- [a-z[A-Z]] - - Same as [a-zA-Z] -
- [a-z&&[g-i]] - - Any character in the intersection of a-z and - g-i -
- [a-z&&[^g-i]] - - Any character in a-z and not in g-i -
-   -
- Prefefined character classes -
- . - - Any character. Multiline matching must be compiled into the pattern for - . to match a \r or a \n. - Even if multiline matching is enabled, . will not - match a \r\n, only a \r or a \n. -
- \d - - [0-9] -
- \D - - [^\d] -
- \s - - [ \t\r\n\x0B] -
- \S - - [^\s] -
- \w - - [a-zA-Z0-9_] -
- \W - - [^\w] -
-   -
- POSIX character classes -
- \p{Lower} - - [a-z] -
- \p{Upper} - - [A-Z] -
- \p{ASCII} - - [\x00-\x7F] -
- \p{Alpha} - - [a-zA-Z] -
- \p{Digit} - - [0-9] -
- \p{Alnum} - - [\w&&[^_]] -
- \p{Punct} - - [!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~] -
- \p{XDigit} - - [a-fA-F0-9] -
-   -
- Boundary Matches -
- ^ - - The beginning of a line. Also matches the beginning of input. -
- $ - - The end of a line. Also matches the end of input. -
- \b - - A word boundary -
- \B - - A non word boundary -
- \A - - The beginning of input -
- \G - - The end of the previous match. Ensures that a "next" match will only - happen if it begins with the character immediately following the end of - the "current" match. -
- \Z - - The end of input. Will also match if there is a single trailing - \r\n, a single trailing \r, or a single - trailing \n. -
- \z - - The end of input -
-   -
- Greedy Quantifiers -
- x? - - x, either zero times or one time -
- x* - - x, zero or more times -
- x+ - - x, one or more times -
- x{n} - - x, exactly n times -
- x{n,} - - x, at least n times -
- x{,m} - - x, at most m times -
- x{n,m} - - x, at least n times and at most - m times -
-   -
- Possessive Quantifiers -
- x?+ - - x, either zero times or one time -
- x*+ - - x, zero or more times -
- x++ - - x, one or more times -
- x{n}+ - - x, exactly n times -
- x{n,}+ - - x, at least n times -
- x{,m}+ - - x, at most m times -
- x{n,m}+ - - x, at least n times and at most - m times -
-   -
- Reluctant Quantifiers -
- x?? - - x, either zero times or one time -
- x*? - - x, zero or more times -
- x+? - - x, one or more times -
- x{n}? - - x, exactly n times -
- x{n,}? - - x, at least n times -
- x{,m}? - - x, at most m times -
- x{n,m}? - - x, at least n times and at most - m times -
-   -
- Operators -
- xy - - x then y -
- x|y - - x or y -
- (x) - - x as a capturing group -
-   -
- Quoting -
- \Q - - Nothing, but treat every character (including \s) literally until a - matching \E -
- \E - - Nothing, but ends its matching \Q -
-   -
- Special Constructs -
- (?:x) - - x, but not as a capturing group -
- (?=x) - - x, via positive lookahead. This means that the - expression will match only if it is trailed by x. - It will not "eat" any of the characters matched by - x. -
- (?!x) - - x, via negative lookahead. This means that the - expression will match only if it is not trailed by - x. It will not "eat" any of the characters - matched by x. -
- (?<=x) - - x, via positive lookbehind. x - cannot contain any quantifiers. -
- (?x) - - x, via negative lookbehind. x - cannot contain any quantifiers. -
- (?>x) - - x{1}+ -
-   -
- Registered Expression Matching -
- {x} - - The registered pattern x -
- -
- - Begin Text Extracted And Modified From java.util.regex.WCPattern documentation - -

Backslashes, escapes, and quoting

- -

The backslash character ('\') serves to introduce escaped - constructs, as defined in the table above, as well as to quote characters - that otherwise would be interpreted as unescaped constructs. Thus the - expression \\ matches a single backslash and \{ matches a - left brace. - -

It is an error to use a backslash prior to any alphabetic character that - does not denote an escaped construct; these are reserved for future - extensions to the regular-expression language. A backslash may be used - prior to a non-alphabetic character regardless of whether that character is - part of an unescaped construct. - -

It is necessary to double backslashes in string literals that represent - regular expressions to protect them from interpretation by a compiler. The - string literal "\b", for example, matches a single backspace - character when interpreted as a regular expression, while - "\\b" matches a word boundary. The string litera - "\(hello\)" is illegal and leads to a compile-time error; - in order to match the string (hello) the string literal - "\\(hello\\)" must be used. - -

Character Classes

- -

Character classes may appear within other character classes, and - may be composed by the union operator (implicit) and the intersection - operator (&&). - The union operator denotes a class that contains every character that is - in at least one of its operand classes. The intersection operator - denotes a class that contains every character that is in both of its - operand classes. - -

The precedence of character-class operators is as follows, from - highest to lowest: - -

- - - - - - - - - - - - - - - - -
1    Literal escape    \x
2    Rangea-z
3    Grouping[...]
4    Intersection[a-z&&[aeiou]]
5    Union[a-e][i-u]
- -

Note that a different set of metacharacters are in effect inside - a character class than outside a character class. For instance, the - regular expression . loses its special meaning inside a - character class, while the expression - becomes a range - forming metacharacter. - - - - -

Groups and capturing

- -

Capturing groups are numbered by counting their opening parentheses from - left to right. In the expression ((A)(B(C))), for example, there - are four such groups:

- -
- - - - - - - - - - -
1    ((A)(B(C)))
2    (A)
3    (B(C))
4    (C)
- -

Group zero always stands for the entire expression. - -

Capturing groups are so named because, during a match, each subsequence - of the input sequence that matches such a group is saved. The captured - subsequence may be used later in the expression, via a back reference, and - may also be retrieved from the matcher once the match operation is complete. - -

The captured input associated with a group is always the subsequence - that the group most recently matched. If a group is evaluated a second time - because of quantification then its previously-captured value, if any, will - be retained if the second evaluation fails. Matching the string - L"aba" against the expression (a(b)?)+, for example, leaves - group two set to L"b". All captured input is discarded at the - beginning of each match. - -

Groups beginning with (? are pure, non-capturing groups - that do not capture text and do not count towards the group total. - - -

WC support

- -

Coming Soon. - -

Comparison to Perl 5

- -

The WCPattern engine performs traditional NFA-based matching - with ordered alternation as occurs in Perl 5. - -

Perl constructs not supported by this class:

- -
    - -
  • The conditional constructs (?{X}) and - (?(condition)X|Y), -

  • - -
  • The embedded code constructs (?{code}) - and (??{code}),

  • - -
  • The embedded comment syntax (?#comment), and

  • - -
  • The preprocessing operations \l \u, - \L, and \U.

  • - -
  • Embedded flags

  • - -
- -

Constructs supported by this class but not by Perl:

- -
    - -
  • Possessive quantifiers, which greedily match as much as they can - and do not back off, even when doing so would allow the overall match to - succeed.

  • - -
  • Character-class union and intersection as described - above.

  • - -
- -

Notable differences from Perl:

- -
    - -
  • In Perl, \1 through \9 are always interpreted - as back references; a backslash-escaped number greater than 9 is - treated as a back reference if at least that many subexpressions exist, - otherwise it is interpreted, if possible, as an octal escape. In this - class octal escapes must always begin with a zero. In this class, - \1 through \9 are always interpreted as back - references, and a larger number is accepted as a back reference if at - least that many subexpressions exist at that point in the regular - expression, otherwise the parser will drop digits until the number is - smaller or equal to the existing number of groups or it is one digit. -

  • - -
  • Perl uses the g flag to request a match that resumes - where the last match left off. This functionality is provided implicitly - by the WCMatcher class: Repeated invocations of the - find method will resume where the last match left off, - unless the matcher is reset.

  • - -
  • Perl is forgiving about malformed matching constructs, as in the - expression *a, as well as dangling brackets, as in the - expression abc], and treats them as literals. This - class also strict and will not compile a pattern when dangling characters - are encountered.

  • - -
- - -

For a more precise description of the behavior of regular expression - constructs, please see - Mastering Regular Expressions, 2nd Edition, Jeffrey E. F. Friedl, - O'Reilly and Associates, 2002. -

-

- - End Text Extracted And Modified From java.util.regex.WCPattern documentation - -


- - @author Jeffery Stuart - @since March 2003, Stable Since November 2004 - @version 1.07.00 - @memo A class used to represent "PERL 5"-ish regular expressions - */ -class WCPattern -{ - friend class WCMatcher; - friend class NFAUNode; - friend class NFAQuantifierUNode; -private: - /** - This constructor should not be called directly. Those wishing to use the - WCPattern class should instead use the {@link compile compile} method. - - @param rhs The pattern to compile - @memo Creates a new pattern from the regular expression in rhs. - */ - WCPattern(const CMStringW &rhs); -protected: - /** - This currently is not used, so don't try to do anything with it. - @memo Holds all the compiled patterns for quick access. - */ - static std::map compiledWCPatterns; - /** - Holds all of the registered patterns as strings. Due to certain problems - with compilation of patterns, especially with capturing groups, this seemed - to be the best way to do it. - */ - static std::map > registeredWCPatterns; -protected: - /** - Holds all the NFA nodes used. This makes deletion of a pattern, as well as - clean-up from an unsuccessful compile much easier and faster. - */ - std::map nodes; - /** - Used when methods like split are called. The matcher class uses a lot of - dynamic memeory, so having an instance increases speedup of certain - operations. - */ - WCMatcher * matcher; - /** - The front node of the NFA. - */ - NFAUNode * head; - /** - The actual regular expression we rerpesent - */ - CMStringW pattern; - /** - Flag used during compilation. Once the pattern is successfully compiled, - error is no longer used. - */ - bool error; - /** - Used during compilation to keep track of the current index into - {@link pattern pattern}. Once the pattern is successfully - compiled, error is no longer used. - */ - int curInd; - /** - The number of capture groups this contains. - */ - int groupCount; - /** - The number of non-capture groups this contains. - */ - int nonCapGroupCount; - /** - The flags specified when this was compiled. - */ - unsigned long flags; -protected: - /** - Raises an error during compilation. Compilation will cease at that point - and compile will return NULL. - */ - void raiseError(); - /** - Convenience function for registering a node in nodes. - @param node The node to register - @return The registered node - */ - NFAUNode * registerNode(NFAUNode * node); - - /** - Calculates the union of two strings. This function will first sort the - strings and then use a simple selection algorithm to find the union. - @param s1 The first "class" to union - @param s2 The second "class" to union - @return A new string containing all unique characters. Each character - must have appeared in one or both of s1 and - s2. - */ - CMStringW classUnion(CMStringW s1, CMStringW s2) const; - /** - Calculates the intersection of two strings. This function will first sort - the strings and then use a simple selection algorithm to find the - intersection. - @param s1 The first "class" to intersect - @param s2 The second "class" to intersect - @return A new string containing all unique characters. Each character - must have appeared both s1 and s2. - */ - CMStringW classIntersect(CMStringW s1, CMStringW s2) const; - /** - Calculates the negation of a string. The negation is the set of all - characters between \x00 and \xFF not - contained in s1. - @param s1 The "class" to be negated. - @param s2 The second "class" to intersect - @return A new string containing all unique characters. Each character - must have appeared both s1 and s2. - */ - CMStringW classNegate(CMStringW s1) const; - /** - Creates a new "class" representing the range from low thru - hi. This function will wrap if low > - hi. This is a feature, not a buf. Sometimes it is useful - to be able to say [\x70-\x10] instead of [\x70-\x7F\x00-\x10]. - @param low The beginning character - @param hi The ending character - @return A new string containing all the characters from low thru hi. - */ - CMStringW classCreateRange(wchar_t low, wchar_t hi) const; - - /** - Extracts a decimal number from the substring of member-variable - {@link pattern pattern} starting at start and - ending at end. - @param start The starting index in {@link pattern pattern} - @param end The last index in {@link pattern pattern} - @return The decimal number in {@link pattern pattern} - */ - int getInt(int start, int end); - /** - Parses a {n,m} string out of the member-variable - {@link pattern pattern} stores the result in sNum - and eNum. - @param sNum Output parameter. The minimum number of matches required - by the curly quantifier are stored here. - @param eNum Output parameter. The maximum number of matches allowed - by the curly quantifier are stored here. - @return Success/Failure. Fails when the curly does not have the proper - syntax - */ - bool quantifyCurly(int & sNum, int & eNum); - /** - Tries to quantify the currently parsed group. If the group being parsed - is indeed quantified in the member-variable - {@link pattern pattern}, then the NFA is modified accordingly. - @param start The starting node of the current group being parsed - @param stop The ending node of the current group being parsed - @param gn The group number of the current group being parsed - @return The node representing the starting node of the group. If the - group becomes quantified, then this node is not necessarily - a GroupHead node. - */ - NFAUNode * quantifyGroup(NFAUNode * start, NFAUNode * stop, const int gn); - - /** - Tries to quantify the last parsed expression. If the character was indeed - quantified, then the NFA is modified accordingly. - @param newNode The recently created expression node - @return The node representing the last parsed expression. If the - expression was quantified, return value != newNode - */ - NFAUNode * quantify(NFAUNode * newNode); - /** - Parses the current class being examined in - {@link pattern pattern}. - @return A string of unique characters contained in the current class being - parsed - */ - CMStringW parseClass(); - /** - Parses the current POSIX class being examined in - {@link pattern pattern}. - @return A string of unique characters representing the POSIX class being - parsed - */ - CMStringW parsePosix(); - /** - Returns a string containing the octal character being parsed - @return The string contained the octal value being parsed - */ - CMStringW parseOctal(); - /** - Returns a string containing the hex character being parsed - @return The string contained the hex value being parsed - */ - CMStringW parseHex(); - /** - Returns a new node representing the back reference being parsed - @return The new node representing the back reference being parsed - */ - NFAUNode * parseBackref(); - /** - Parses the escape sequence currently being examined. Determines if the - escape sequence is a class, a single character, or the beginning of a - quotation sequence. - @param inv Output parameter. Whether or not to invert the returned class - @param quo Output parameter. Whether or not this sequence starts a - quotation. - @return The characters represented by the class - */ - CMStringW parseEscape(bool & inv, bool & quo); - /** - Parses a supposed registered pattern currently under compilation. If the - sequence of characters does point to a registered pattern, then the - registered pattern is appended to *end. The registered pattern - is parsed with the current compilation flags. - @param end The ending node of the thus-far compiled pattern - @return The new end node of the current pattern - */ - NFAUNode * parseRegisteredWCPattern(NFAUNode ** end); - /** - Parses a lookbehind expression. Appends the necessary nodes - *end. - @param pos Positive or negative look behind - @param end The ending node of the current pattern - @return The new end node of the current pattern - */ - NFAUNode * parseBehind(const bool pos, NFAUNode ** end); - /** - Parses the current expression and tacks on nodes until a \E is found. - @return The end of the current pattern - */ - NFAUNode * parseQuote(); - /** - Parses {@link pattern pattern}. This function is called - recursively when an or (|) or a group is encountered. - @param inParen Are we currently parsing inside a group - @param inOr Are we currently parsing one side of an or (|) - @param end The end of the current expression - @return The starting node of the NFA constructed from this parse - */ - NFAUNode * parse(const bool inParen = 0, const bool inOr = 0, NFAUNode ** end = NULL); -public: - /// We should match regardless of case - const static unsigned long CASE_INSENSITIVE; - /// We are implicitly quoted - const static unsigned long LITERAL; - /// @memo We should treat a . as [\x00-\x7F] - const static unsigned long DOT_MATCHES_ALL; - /** ^ and $ should anchor to the beginning and - ending of lines, not all input - */ - const static unsigned long MULTILINE_MATCHING; - /** When enabled, only instances of \n are recognized as - line terminators - */ - const static unsigned long UNIX_LINE_MODE; - /// The absolute minimum number of matches a quantifier can match (0) - const static int MIN_QMATCH; - /// The absolute maximum number of matches a quantifier can match (0x7FFFFFFF) - const static int MAX_QMATCH; -public: - /** - Call this function to compile a regular expression into a - WCPattern object. Special values can be assigned to - mode when certain non-standard behaviors are expected from - the WCPattern object. - @param pattern The regular expression to compile - @param mode A bitwise or of flags signalling what special behaviors are - wanted from this WCPattern object - @return If successful, compile returns a WCPattern - pointer. Upon failure, compile returns - NULL - */ - static WCPattern * compile(const CMStringW & pattern, - const unsigned long mode = 0); - /** - Dont use this function. This function will compile a pattern, and cache - the result. This will eventually be used as an optimization when people - just want to call static methods using the same pattern over and over - instead of first compiling the pattern and then using the compiled - instance for matching. - @param pattern The regular expression to compile - @param mode A bitwise or of flags signalling what special behaviors are - wanted from this WCPattern object - @return If successful, compileAndKeep returns a - WCPattern pointer. Upon failure, compile - returns NULL. - */ - static WCPattern * compileAndKeep(const CMStringW & pattern, - const unsigned long mode = 0); - - /** - Searches through replace and replaces all substrings matched - by pattern with str. str may - contain backreferences (e.g. \1) to capture groups. A typical - invocation looks like: -

- - WCPattern::replace(L"(a+)b(c+)", L"abcccbbabcbabc", L"\\2b\\1"); - -

- which would replace abcccbbabcbabc with - cccbabbcbabcba. - @param pattern The regular expression - @param str The replacement text - @param replacementText The string in which to perform replacements - @param mode The special mode requested of the WCPattern - during the replacement process - @return The text with the replacement string substituted where necessary - */ - static CMStringW replace(const CMStringW & pattern, - const CMStringW & str, - const CMStringW & replacementText, - const unsigned long mode = 0); - - /** - Splits the specified string over occurrences of the specified pattern. - Empty strings can be optionally ignored. The number of strings returned is - configurable. A typical invocation looks like: -

- - CMStringW str(strSize, 0);
- FILE * fp = fopen(fileName, "r");
- fread((char*)str.data(), strSize * 2, 1, fp);
- fclose(fp);
-
- std::vector<CMStringW> lines = WCPattern::split(L"[\r\n]+", str, true);
-
-
- - @param pattern The regular expression - @param replace The string to split - @param keepEmptys Whether or not to keep empty strings - @param limit The maximum number of splits to make - @param mode The special mode requested of the WCPattern - during the split process - @return All substrings of str split across pattern. - */ - static std::vector split(const CMStringW & pattern, - const CMStringW & str, - const bool keepEmptys = 0, - const unsigned long limit = 0, - const unsigned long mode = 0); - - /** - Finds all the instances of the specified pattern within the string. You - should be careful to only pass patterns with a minimum length of one. For - example, the pattern a* can be matched by an empty string, so - instead you should pass a+ since at least one character must - be matched. A typical invocation of findAll looks like: -

- - std::vector<td::string> numbers = WCPattern::findAll(L"\\d+", string); - -

- - @param pattern The pattern for which to search - @param str The string to search - @param mode The special mode requested of the WCPattern - during the find process - @return All instances of pattern in str - */ - static std::vector findAll(const CMStringW & pattern, - const CMStringW & str, - const unsigned long mode = 0); - - /** - Determines if an entire string matches the specified pattern - - @param pattern The pattern for to match - @param str The string to match - @param mode The special mode requested of the WCPattern - during the replacement process - @return True if str is recognized by pattern - */ - static bool matches(const CMStringW & pattern, - const CMStringW & str, - const unsigned long mode = 0); - - /** - Registers a pattern under a specific name for use in later compilations. - A typical invocation and later use looks like: -

- - WCPattern::registerWCPattern(L"ip", L"(?:\\d{1,3}\\.){3}\\d{1,3}");
- WCPattern * p1 = WCPattern::compile(L"{ip}:\\d+");
- WCPattern * p2 = WCPattern::compile(L"Connection from ({ip}) on port \\d+");
-
-

- Multiple calls to registerWCPattern with the same - name will result in the pattern getting overwritten. - - @param name The name to give to the pattern - @param pattern The pattern to register - @param mode Any special flags to use when compiling pattern - @return Success/Failure. Fails only if pattern has invalid - syntax - */ - static bool registerWCPattern(const CMStringW & name, - const CMStringW & pattern, - const unsigned long mode = 0); - - /** - Clears the pattern registry - */ - static void unregisterWCPatterns(); - /** - Don't use - */ - static void clearWCPatternCache(); - - /** - Searches through a string for the nth match of the - given pattern in the string. Match indeces start at zero, not one. - A typical invocation looks like this: -

- - std::pair<CMStringW, int> match = WCPattern::findNthMatch(L"\\d{1,3}", L"192.168.1.101:22", 1);
- wprintf(L"%s %i\n", match.first.c_str(), match.second);
-
- Output: 168 4
-
- - @param pattern The pattern for which to search - @param str The string to search - @param matchNum Which match to find - @param mode Any special flags to use during the matching process - @return A string and an integer. The string is the string matched. The - integer is the starting location of the matched string in - str. You can check for success/failure by making sure - that the integer returned is greater than or equal to zero. - */ - static std::pair findNthMatch(const CMStringW & pattern, - const CMStringW & str, - const int matchNum, - const unsigned long mode = 0); -public: - /** - Deletes all NFA nodes allocated during compilation - */ - ~WCPattern(); - - CMStringW replace(const CMStringW & str, - const CMStringW & replacementText); - std::vector split(const CMStringW & str, const bool keepEmptys = 0, - const unsigned long limit = 0); - std::vector findAll(const CMStringW & str); - bool matches(const CMStringW & str); - /** - Returns the flags used during compilation of this pattern - @return The flags used during compilation of this pattern - */ - unsigned long getFlags() const; - /** - Returns the regular expression this pattern represents - @return The regular expression this pattern represents - */ - CMStringW getWCPattern() const; - /** - Creates a matcher object using the specified string and this pattern. - @param str The string to match against - @return A new matcher using object using this pattern and the specified - string - */ - WCMatcher * createWCMatcher(const CMStringW & str); -}; - -class NFAUNode -{ - friend class WCMatcher; -public: - NFAUNode * next; - NFAUNode(); - virtual ~NFAUNode(); - virtual void findAllNodes(std::map & soFar); - virtual int match(const CMStringW & str, WCMatcher * matcher, const int curInd = 0) const = 0; - inline virtual bool isGroupHeadNode() const { return false; } - inline virtual bool isStartOfInputNode() const { return false; } -}; -class NFACharUNode : public NFAUNode -{ -protected: - wchar_t ch; -public: - NFACharUNode(const wchar_t c); - virtual int match(const CMStringW & str, WCMatcher * matcher, const int curInd = 0) const; -}; -class NFACICharUNode : public NFAUNode -{ -protected: - wchar_t ch; -public: - NFACICharUNode(const wchar_t c); - virtual int match(const CMStringW & str, WCMatcher * matcher, const int curInd = 0) const; -}; -class NFAStartUNode : public NFAUNode -{ -public: - NFAStartUNode(); - virtual int match(const CMStringW & str, WCMatcher * matcher, const int curInd = 0) const; -}; -class NFAEndUNode : public NFAUNode -{ -public: - NFAEndUNode(); - virtual int match(const CMStringW & str, WCMatcher * matcher, const int curInd = 0) const; -}; -class NFAQuantifierUNode : public NFAUNode -{ -public: - int min, max; - NFAUNode * inner; - virtual void findAllNodes(std::map & soFar); - NFAQuantifierUNode(WCPattern * pat, NFAUNode * internal, - const int minMatch = WCPattern::MIN_QMATCH, - const int maxMatch = WCPattern::MAX_QMATCH); - virtual int match(const CMStringW & str, WCMatcher * matcher, const int curInd = 0) const; -}; -class NFAGreedyQuantifierUNode : public NFAQuantifierUNode -{ -public: - NFAGreedyQuantifierUNode(WCPattern * pat, NFAUNode * internal, - const int minMatch = WCPattern::MIN_QMATCH, - const int maxMatch = WCPattern::MAX_QMATCH); - virtual int match(const CMStringW & str, WCMatcher * matcher, const int curInd = 0) const; - virtual int matchInternal(const CMStringW & str, WCMatcher * matcher, const int curInd, const int soFar) const; -}; -class NFALazyQuantifierUNode : public NFAQuantifierUNode -{ -public: - NFALazyQuantifierUNode(WCPattern * pat, NFAUNode * internal, - const int minMatch = WCPattern::MIN_QMATCH, - const int maxMatch = WCPattern::MAX_QMATCH); - virtual int match(const CMStringW & str, WCMatcher * matcher, const int curInd = 0) const; -}; -class NFAPossessiveQuantifierUNode : public NFAQuantifierUNode -{ -public: - NFAPossessiveQuantifierUNode(WCPattern * pat, NFAUNode * internal, - const int minMatch = WCPattern::MIN_QMATCH, - const int maxMatch = WCPattern::MAX_QMATCH); - virtual int match(const CMStringW & str, WCMatcher * matcher, const int curInd = 0) const; -}; -class NFAAcceptUNode : public NFAUNode -{ -public: - NFAAcceptUNode(); - virtual int match(const CMStringW & str, WCMatcher * matcher, const int curInd = 0) const; -}; -class NFAClassUNode : public NFAUNode -{ -public: - bool inv; - std::map vals; - NFAClassUNode(const bool invert = 0); - NFAClassUNode(const CMStringW & clazz, const bool invert); - virtual int match(const CMStringW & str, WCMatcher * matcher, const int curInd = 0) const; -}; -class NFACIClassUNode : public NFAUNode -{ -public: - bool inv; - std::map vals; - NFACIClassUNode(const bool invert = 0); - NFACIClassUNode(const CMStringW & clazz, const bool invert); - virtual int match(const CMStringW & str, WCMatcher * matcher, const int curInd = 0) const; -}; -class NFASubStartUNode : public NFAUNode -{ -public: - NFASubStartUNode(); - virtual int match(const CMStringW & str, WCMatcher * matcher, const int curInd = 0) const; -}; -class NFAOrUNode : public NFAUNode -{ -public: - NFAUNode * one; - NFAUNode * two; - NFAOrUNode(NFAUNode * first, NFAUNode * second); - virtual void findAllNodes(std::map & soFar); - virtual int match(const CMStringW & str, WCMatcher * matcher, const int curInd = 0) const; -}; -class NFAQuoteUNode : public NFAUNode -{ -public: - CMStringW qStr; - NFAQuoteUNode(const CMStringW & quoted); - virtual int match(const CMStringW & str, WCMatcher * matcher, const int curInd = 0) const; -}; -class NFACIQuoteUNode : public NFAUNode -{ -public: - CMStringW qStr; - NFACIQuoteUNode(const CMStringW & quoted); - virtual int match(const CMStringW & str, WCMatcher * matcher, const int curInd = 0) const; -}; -class NFALookAheadUNode : public NFAUNode -{ -public: - bool pos; - NFAUNode * inner; - NFALookAheadUNode(NFAUNode * internal, const bool positive); - virtual void findAllNodes(std::map & soFar); - virtual int match(const CMStringW & str, WCMatcher * matcher, const int curInd = 0) const; -}; -class NFALookBehindUNode : public NFAUNode -{ -public: - bool pos; - CMStringW mStr; - NFALookBehindUNode(const CMStringW & str, const bool positive); - virtual int match(const CMStringW & str, WCMatcher * matcher, const int curInd = 0) const; -}; -class NFAStartOfLineUNode : public NFAUNode -{ -public: - NFAStartOfLineUNode(); - virtual int match(const CMStringW & str, WCMatcher * matcher, const int curInd = 0) const; -}; -class NFAEndOfLineUNode : public NFAUNode -{ -public: - NFAEndOfLineUNode(); - virtual int match(const CMStringW & str, WCMatcher * matcher, const int curInd = 0) const; -}; -class NFAReferenceUNode : public NFAUNode -{ -public: - int gi; - NFAReferenceUNode(const int groupIndex); - virtual int match(const CMStringW & str, WCMatcher * matcher, const int curInd = 0) const; -}; -class NFAStartOfInputUNode : public NFAUNode -{ -public: - NFAStartOfInputUNode(); - virtual int match(const CMStringW & str, WCMatcher * matcher, const int curInd = 0) const; - inline virtual bool isStartOfInputNode() const { return false; } -}; -class NFAEndOfInputUNode : public NFAUNode -{ -public: - bool term; - NFAEndOfInputUNode(const bool lookForTerm); - virtual int match(const CMStringW & str, WCMatcher * matcher, const int curInd = 0) const; -}; -class NFAWordBoundaryUNode : public NFAUNode -{ -public: - bool pos; - NFAWordBoundaryUNode(const bool positive); - virtual int match(const CMStringW & str, WCMatcher * matcher, const int curInd = 0) const; -}; -class NFAEndOfMatchUNode : public NFAUNode -{ -public: - NFAEndOfMatchUNode(); - virtual int match(const CMStringW & str, WCMatcher * matcher, const int curInd = 0) const; -}; -class NFAGroupHeadUNode : public NFAUNode -{ -public: - int gi; - NFAGroupHeadUNode(const int groupIndex); - virtual int match(const CMStringW & str, WCMatcher * matcher, const int curInd = 0) const; - inline virtual bool isGroupHeadNode() const { return false; } -}; -class NFAGroupTailUNode : public NFAUNode -{ -public: - int gi; - NFAGroupTailUNode(const int groupIndex); - virtual int match(const CMStringW & str, WCMatcher * matcher, const int curInd = 0) const; -}; -class NFAGroupLoopPrologueUNode : public NFAUNode -{ -public: - int gi; - NFAGroupLoopPrologueUNode(const int groupIndex); - virtual int match(const CMStringW & str, WCMatcher * matcher, const int curInd = 0) const; -}; -class NFAGroupLoopUNode : public NFAUNode -{ -public: - int gi, min, max, type; - NFAUNode * inner; - NFAGroupLoopUNode(NFAUNode * internal, const int minMatch, - const int maxMatch, const int groupIndex, const int matchType); - virtual void findAllNodes(std::map & soFar); - virtual int match(const CMStringW & str, WCMatcher * matcher, const int curInd = 0) const; - int matchGreedy(const CMStringW & str, WCMatcher * matcher, const int curInd = 0) const; - int matchLazy(const CMStringW & str, WCMatcher * matcher, const int curInd = 0) const; - int matchPossessive(const CMStringW & str, WCMatcher * matcher, const int curInd = 0) const; -}; - -#endif - diff --git a/plugins/SmileyAdd/src/smileys.cpp b/plugins/SmileyAdd/src/smileys.cpp index f66a6e374e..3839c51951 100644 --- a/plugins/SmileyAdd/src/smileys.cpp +++ b/plugins/SmileyAdd/src/smileys.cpp @@ -222,51 +222,42 @@ static DWORD_PTR ConvertServiceParam(MCONTACT hContact, const wchar_t *param) void SmileyType::CallSmileyService(MCONTACT hContact) { - _TPattern *srvsplit = _TPattern::compile(L"(.*)\\|(.*)\\|(.*)"); - _TMatcher *m0 = srvsplit->createWCMatcher(GetTriggerText()); - m0->findFirstMatch(); + MRegexp16 srvsplit(L"(.*)\\|(.*)\\|(.*)"); + srvsplit.match(m_TriggerText); - CMStringW name = m0->getGroup(1); - CMStringW par1 = m0->getGroup(2); - CMStringW par2 = m0->getGroup(3); + CMStringW name = srvsplit.getGroup(1); + CMStringW par1 = srvsplit.getGroup(2); + CMStringW par2 = srvsplit.getGroup(3); - delete m0; - delete srvsplit; - - char str[MAXMODULELABELLENGTH]; const char *proto = ""; - if (name[0] == '/') { proto = (const char*)GetContactProto(hContact); - if (proto == nullptr) return; + if (proto == nullptr) + return; } + + char str[MAXMODULELABELLENGTH]; mir_snprintf(str, "%s%s", proto, _T2A(name.c_str())); CallService(str, ConvertServiceParam(hContact, par1.c_str()), ConvertServiceParam(hContact, par2.c_str())); } - - SmileyPackType::~SmileyPackType() { if (m_hSmList != nullptr) ImageList_Destroy(m_hSmList); } -static const wchar_t urlRegEx[] = -L"(?:ftp|https|http|file|aim|webcal|irc|msnim|xmpp|gopher|mailto|news|nntp|telnet|wais|prospero)://?[\\w.?%:/$+;]*"; +static const wchar_t urlRegEx[] = L"(?:ftp|https|http|file|aim|webcal|irc|msnim|xmpp|gopher|mailto|news|nntp|telnet|wais|prospero)://?[\\w.?%:/$+;]*"; static const wchar_t pathRegEx[] = L"[\\s\"][a-zA-Z]:[\\\\/][\\w.\\-\\\\/]*"; static const wchar_t timeRegEx[] = L"\\d{1,2}:\\d{2}:\\d{2}|\\d{1,2}:\\d{2}"; void SmileyPackType::AddTriggersToSmileyLookup(void) { - _TPattern *p = _TPattern::compile(L"\\s+"); - { - CMStringW emptystr; - m_SmileyLookup.insert(new SmileyLookup(urlRegEx, true, -1, emptystr)); - m_SmileyLookup.insert(new SmileyLookup(pathRegEx, true, -1, emptystr)); - m_SmileyLookup.insert(new SmileyLookup(timeRegEx, true, -1, emptystr)); - } + CMStringW emptystr; + m_SmileyLookup.insert(new SmileyLookup(urlRegEx, true, -1, emptystr)); + m_SmileyLookup.insert(new SmileyLookup(pathRegEx, true, -1, emptystr)); + m_SmileyLookup.insert(new SmileyLookup(timeRegEx, true, -1, emptystr)); for (int dist = 0; dist < m_SmileyList.getCount(); dist++) { if (m_SmileyList[dist].IsRegEx()) { @@ -280,50 +271,34 @@ void SmileyPackType::AddTriggersToSmileyLookup(void) } else if (!m_SmileyList[dist].IsService()) { bool first = true; - int li = 0; - _TMatcher *m0 = p->createWCMatcher(m_SmileyList[dist].GetTriggerText()); - while (m0->findNextMatch()) { - int stind = m0->getStartingIndex(); - if (li != stind) { - CMStringW out; - ReplaceAllSpecials(m0->getString().Mid(li, stind - li), out); - SmileyLookup *dats = new SmileyLookup(out, false, dist, GetFilename()); - if (dats->IsValid()) { - m_SmileyLookup.insert(dats); - if (first) { - m_SmileyList[dist].m_InsertText = out; - first = false; - } - } // fallthrough - } - li = m0->getEndingIndex(); - } - - int stind = (int)m0->getString().GetLength(); - if (li < stind) { - CMStringW out; - ReplaceAllSpecials(m0->getString().Mid(li, stind - li), out); - SmileyLookup *dats = new SmileyLookup(out, false, dist, GetFilename()); + const CMStringW &text = m_SmileyList[dist].GetTriggerText(); + int iStart = 0; + while (true) { + CMStringW wszWord = text.Tokenize(L" \t", iStart); + if (iStart == -1) + break; + + ReplaceAllSpecials(wszWord, wszWord); + SmileyLookup *dats = new SmileyLookup(wszWord, false, dist, GetFilename()); if (dats->IsValid()) { m_SmileyLookup.insert(dats); if (first) { - m_SmileyList[dist].m_InsertText = out; + m_SmileyList[dist].m_InsertText = wszWord; first = false; } } - else - delete dats; + else delete dats; } - delete m0; } } - delete p; } void SmileyPackType::ReplaceAllSpecials(const CMStringW &Input, CMStringW &Output) { - Output = _TPattern::replace(L"%%_{1,2}%%", Input, L" "); - Output = _TPattern::replace(L"%%''%%", Output, L"\""); + Output = Input; + Output.Replace(L"%%_%%", L" "); + Output.Replace(L"%%__%%", L" "); + Output.Replace(L"%%''%%", L"\""); } void SmileyPackType::Clear(void) @@ -393,11 +368,7 @@ bool SmileyPackType::LoadSmileyFile(const CMStringW &filename, const CMStringW & delete[] buf; - bool res; - if (filename.Find(L".xep") == -1) - res = LoadSmileyFileMSL(tbuf, onlyInfo, modpath); - else - res = LoadSmileyFileXEP(tbuf, onlyInfo, modpath); + bool res = LoadSmileyFileMSL(tbuf, onlyInfo, modpath); if (errorFound) ReportError(TranslateT("There were problems loading smiley pack (it should be corrected).\nSee network log for details.")); @@ -409,305 +380,140 @@ bool SmileyPackType::LoadSmileyFileMSL(CMStringW &tbuf, bool onlyInfo, CMStringW { CMStringW pathstr, packstr; { - _TPattern *pathsplit = _TPattern::compile(L"(.*\\\\)(.*)\\.|$"); - _TMatcher *m0 = pathsplit->createWCMatcher(modpath); - m0->findFirstMatch(); - pathstr = m0->getGroup(1); - packstr = m0->getGroup(2); - delete m0; - delete pathsplit; - } - { - _TPattern *otherf = _TPattern::compile( - L"^\\s*(Name|Author|Date|Version|ButtonSmiley)\\s*=\\s*\"(.*)\"", - _TPattern::MULTILINE_MATCHING); - - _TMatcher *m0 = otherf->createWCMatcher(tbuf); - while (m0->findNextMatch()) { - if (m0->getGroup(1) == L"Name") m_Name = m0->getGroup(2); - if (m0->getGroup(1) == L"Author") m_Author = m0->getGroup(2); - if (m0->getGroup(1) == L"Date") m_Date = m0->getGroup(2); - if (m0->getGroup(1) == L"Version") m_Version = m0->getGroup(2); - if (m0->getGroup(1) == L"ButtonSmiley") m_ButtonSmiley = m0->getGroup(2); - } - delete m0; - delete otherf; + MRegexp16 pathsplit(L"(.*\\\\)(.*)\\.|$"); + pathsplit.match(modpath); + + pathstr = pathsplit.getGroup(1); + packstr = pathsplit.getGroup(2); } - if (!onlyInfo) { + if (!onlyInfo) selec.x = selec.y = win.x = win.y = 0; - { - _TPattern *pat = _TPattern::compile( - L"^\\s*(Selection|Window)Size\\s*=\\s*(\\d+)\\s*,\\s*(\\d+)", - _TPattern::MULTILINE_MATCHING); - _TMatcher *m0 = pat->createWCMatcher(tbuf); - while (m0->findNextMatch()) { - POINT tpt; - tpt.x = _wtol(m0->getGroup(2).c_str()); - tpt.y = _wtol(m0->getGroup(3).c_str()); - - if (m0->getGroup(1) == L"Selection") - selec = tpt; - else if (m0->getGroup(1) == L"Window") - win = tpt; - } - delete m0; - delete pat; - } - - _TPattern *smiley = _TPattern::compile( - L"^\\s*Smiley(\\*)?\\s*=" // Is Hidden - L"(?:\\s*\"(.*)\")" // Smiley file name - L"(?:[\\s,]+(\\-?\\d+))" // Icon resource id - L"(?:[\\s,]+(R|S)?\"(.*?)\")" // Trigger text - L"(?:[\\s,]+\"(.*?)\")?" // Tooltip or insert text - L"(?:[\\s,]+\"(.*?)\")?", // Tooltip text - _TPattern::MULTILINE_MATCHING); - - SmileyVectorType hiddenSmileys; - unsigned smnum = 0; - { - _TMatcher *m0 = smiley->createWCMatcher(tbuf); - while (m0->findNextMatch()) { - CMStringW resname = m0->getGroup(2); - if (resname.Find(L"http://") != -1) { - if (GetSmileyFile(resname, packstr)) - continue; - } - else if (!resname.IsEmpty()) - resname.Insert(0, pathstr); - SmileyType *dat = new SmileyType; - - const int iconIndex = _wtol(m0->getGroup(3).c_str()); - - dat->SetHidden(m0->getStartingIndex(1) >= 0); - if (m0->getStartingIndex(4) >= 0) { - dat->SetRegEx(m0->getGroup(4) == L"R"); - dat->SetService(m0->getGroup(4) == L"S"); - } - dat->m_TriggerText = m0->getGroup(5); - if (dat->IsRegEx()) { - if (m0->getStartingIndex(6) >= 0) - ReplaceAllSpecials(m0->getGroup(6), dat->m_InsertText); - - if (m0->getStartingIndex(7) >= 0) - ReplaceAllSpecials(m0->getGroup(7), dat->m_ToolText); - else - dat->m_ToolText = dat->m_InsertText; - } - else { - if (m0->getStartingIndex(6) >= 0) - ReplaceAllSpecials(m0->getGroup(6), dat->m_ToolText); - else - ReplaceAllSpecials(dat->m_TriggerText, dat->m_ToolText); - } + int iStart = 0; + MRegexp16 otherf(L"^\\s*(Name|Author|Date|Version|ButtonSmiley)\\s*=\\s*\"(.*)\""); + MRegexp16 size(L"^\\s*(Selection|Window)Size\\s*=\\s*(\\d+)\\s*,\\s*(\\d+)"); + MRegexp16 smiley( + L"^\\s*Smiley(\\*)?\\s*=" // Is Hidden + L"(?:\\s*\"(.*)\")" // Smiley file name + L"(?:[\\s,]+(\\-?\\d+))" // Icon resource id + L"(?:[\\s,]+(R|S)?\"(.*?)\")" // Trigger text + L"(?:[\\s,]+\"(.*?)\")?" // Tooltip or insert text + L"(?:[\\s,]+\"(.*?)\")?"); // Tooltip text + + SmileyVectorType hiddenSmileys; + unsigned smnum = 0; + + while (true) { + CMStringW line = tbuf.Tokenize(L"\r\n", iStart); + if (iStart == -1) + break; - bool noerr; - if (resname.IsEmpty()) { - dat->SetHidden(true); - dat->SetText(true); - noerr = true; - } - else noerr = dat->LoadFromResource(resname, iconIndex); + if (line.IsEmpty() || line[0] == ';') + continue; - if (dat->IsHidden()) - hiddenSmileys.insert(dat); - else - m_SmileyList.insert(dat); - - if (!noerr) { - static const wchar_t errmsg[] = LPGENW("Smiley #%u in file %s for smiley pack %s not found."); - wchar_t msgtxt[1024]; - mir_snwprintf(msgtxt, TranslateW(errmsg), smnum, resname.c_str(), modpath.c_str()); - Netlib_LogW(hNetlibUser, msgtxt); - errorFound = true; - } - smnum++; - } - delete smiley; - delete m0; + if (otherf.match(line) >= 0) { + CMStringW key(otherf.getGroup(1)), value(otherf.getGroup(2)); + if (key == L"Name") + m_Name = value; + else if (key == L"Author") + m_Author = value; + else if (key == L"Date") + m_Date = value; + else if (key == L"Version") + m_Version = value; + else if (key == L"ButtonSmiley") + m_ButtonSmiley = value; + continue; } - m_VisibleCount = m_SmileyList.getCount(); - - m_SmileyList.splice(hiddenSmileys); - - AddTriggersToSmileyLookup(); - } - - return true; -} - - -static void DecodeHTML(CMStringW &str) -{ - if (str.Find('&') != -1) { - str = _TPattern::replace(CMStringW(L"<"), str, CMStringW(L"<")); - str = _TPattern::replace(CMStringW(L">"), str, CMStringW(L">")); - } -} - - -static IStream* DecodeBase64Data(const char *pData) -{ - unsigned dataLen; - ptrA data((char*)mir_base64_decode(pData, &dataLen)); - if (data == NULL) - return nullptr; - - // Read image list - HGLOBAL hBuffer = GlobalAlloc(GMEM_MOVEABLE, dataLen); - if (!hBuffer) - return nullptr; - - void *dst = GlobalLock(hBuffer); - memcpy(dst, data, dataLen); - GlobalUnlock(hBuffer); - - IStream *pStream = nullptr; - CreateStreamOnHGlobal(hBuffer, TRUE, &pStream); - return pStream; -} + if (onlyInfo) + continue; + if (size.match(line) >= 0) { + POINT tpt; + tpt.x = _wtol(size.getGroup(2)); + tpt.y = _wtol(size.getGroup(3)); -bool SmileyPackType::LoadSmileyFileXEP(CMStringW &tbuf, bool onlyInfo, CMStringW&) -{ - _TMatcher *m0, *m1, *m2; - - _TPattern *dbname_re = _TPattern::compile(L"\\s*\"(.*?)\"\\s*", - _TPattern::MULTILINE_MATCHING); - _TPattern *author_re = _TPattern::compile(L"\\s*\"(.*?)\"\\s*", - _TPattern::MULTILINE_MATCHING); - _TPattern *settings_re = _TPattern::compile(L"(.*?)", - _TPattern::MULTILINE_MATCHING | _TPattern::DOT_MATCHES_ALL); - - m0 = settings_re->createWCMatcher(tbuf); - if (m0->findFirstMatch()) { - CMStringW settings = m0->getGroup(1); - - m1 = author_re->createWCMatcher(settings); - if (m1->findFirstMatch()) { - m_Author = m1->getGroup(1); - DecodeHTML(m_Author); + if (size.getGroup(1) == L"Selection") + selec = tpt; + else if (size.getGroup(1) == L"Window") + win = tpt; + continue; } - delete m1; - m1 = dbname_re->createWCMatcher(settings); - if (m1->findFirstMatch()) { - m_Name = m1->getGroup(1); - DecodeHTML(m_Name); - } - delete m1; - } - delete m0; - - delete dbname_re; - delete author_re; - delete settings_re; - - if (!onlyInfo) { - _TPattern *record_re = _TPattern::compile(L"(?:\\s*\"(.*?)\"?(.*?))", - _TPattern::MULTILINE_MATCHING | _TPattern::DOT_MATCHES_ALL); - _TPattern *expression_re = _TPattern::compile(L"\\s*\"(.*?)\"\\s*", - _TPattern::MULTILINE_MATCHING); - _TPattern *pastetext_re = _TPattern::compile(L"\\s*\"(.*?)\"\\s*", - _TPattern::MULTILINE_MATCHING); - _TPattern *images_re = _TPattern::compile(L"(.*?)", - _TPattern::MULTILINE_MATCHING | _TPattern::DOT_MATCHES_ALL); - _TPattern *image_re = _TPattern::compile(L"(.*?)", - _TPattern::MULTILINE_MATCHING | _TPattern::DOT_MATCHES_ALL); - _TPattern *imagedt_re = _TPattern::compile(L"", - _TPattern::MULTILINE_MATCHING); - - m0 = images_re->createWCMatcher(tbuf); - if (m0->findFirstMatch()) { - CMStringW images = m0->getGroup(1); - - m1 = imagedt_re->createWCMatcher(images); - if (m1->findFirstMatch()) { - IStream *pStream = DecodeBase64Data(_T2A(m1->getGroup(1).c_str())); - if (pStream != nullptr) { - if (m_hSmList != nullptr) ImageList_Destroy(m_hSmList); - m_hSmList = ImageList_Read(pStream); - pStream->Release(); - } + if (smiley.match(line)) { + CMStringW resname = smiley.getGroup(2); + if (resname.Find(L"http://") != -1) { + if (GetSmileyFile(resname, packstr)) + continue; } - delete m1; - } - delete m0; + else if (!resname.IsEmpty()) + resname.Insert(0, pathstr); - m0 = record_re->createWCMatcher(tbuf); - while (m0->findNextMatch()) { SmileyType *dat = new SmileyType; - dat->SetRegEx(true); - dat->SetImList(m_hSmList, _wtol(m0->getGroup(1).c_str())); - dat->m_ToolText = m0->getGroup(2); - DecodeHTML(dat->m_ToolText); + const int iconIndex = _wtol(smiley.getGroup(3)); - CMStringW rec = m0->getGroup(3); + dat->SetHidden(!smiley.getGroup(1).IsEmpty()); - m1 = expression_re->createWCMatcher(rec); - if (m1->findFirstMatch()) { - dat->m_TriggerText = m1->getGroup(1); - DecodeHTML(dat->m_TriggerText); + CMStringW wszGrp4(smiley.getGroup(4)); + if (!wszGrp4.IsEmpty()) { + dat->SetRegEx(wszGrp4 == L"R"); + dat->SetService(wszGrp4 == L"S"); } - delete m1; - m1 = pastetext_re->createWCMatcher(rec); - if (m1->findFirstMatch()) { - dat->m_InsertText = m1->getGroup(1); - DecodeHTML(dat->m_InsertText); + dat->m_TriggerText = smiley.getGroup(5); + + CMStringW wszGrp6(smiley.getGroup(6)), wszGrp7(smiley.getGroup(7)); + if (dat->IsRegEx()) { + if (!wszGrp6.IsEmpty()) + ReplaceAllSpecials(wszGrp6, dat->m_InsertText); + + if (!wszGrp7.IsEmpty()) + ReplaceAllSpecials(wszGrp7, dat->m_ToolText); + else + dat->m_ToolText = dat->m_InsertText; } - delete m1; - dat->SetHidden(dat->m_InsertText.IsEmpty()); - - m1 = image_re->createWCMatcher(rec); - if (m1->findFirstMatch()) { - CMStringW images = m1->getGroup(1); - - m2 = imagedt_re->createWCMatcher(images); - if (m2->findFirstMatch()) { - IStream *pStream = DecodeBase64Data(_T2A(m2->getGroup(1).c_str())); - if (pStream != nullptr) { - dat->LoadFromImage(pStream); - pStream->Release(); - } - } - delete m2; + else { + if (!wszGrp6.IsEmpty()) + ReplaceAllSpecials(wszGrp6, dat->m_ToolText); + else + ReplaceAllSpecials(dat->m_TriggerText, dat->m_ToolText); + } + + bool noerr; + if (resname.IsEmpty()) { + dat->SetHidden(true); + dat->SetText(true); + noerr = true; } - delete m1; + else noerr = dat->LoadFromResource(resname, iconIndex); - m_SmileyList.insert(dat); + if (dat->IsHidden()) + hiddenSmileys.insert(dat); + else + m_SmileyList.insert(dat); + + if (!noerr) { + static const wchar_t errmsg[] = LPGENW("Smiley #%u in file %s for smiley pack %s not found."); + wchar_t msgtxt[1024]; + mir_snwprintf(msgtxt, TranslateW(errmsg), smnum, resname.c_str(), modpath.c_str()); + Netlib_LogW(hNetlibUser, msgtxt); + errorFound = true; + } + smnum++; } - delete m0; - - delete record_re; - delete expression_re; - delete pastetext_re; - delete images_re; - delete image_re; - delete imagedt_re; } m_VisibleCount = m_SmileyList.getCount(); - + m_SmileyList.splice(hiddenSmileys); AddTriggersToSmileyLookup(); - - selec.x = 0; - selec.y = 0; - win.x = 0; - win.y = 0; - return true; } - -// +///////////////////////////////////////////////////////////////////////////////////////// // SmileyPackListType -// - bool SmileyPackListType::AddSmileyPack(CMStringW &filename, CMStringW &packname) { @@ -724,7 +530,6 @@ bool SmileyPackListType::AddSmileyPack(CMStringW &filename, CMStringW &packname) return res; } - SmileyPackType* SmileyPackListType::GetSmileyPack(CMStringW &filename) { CMStringW modpath; @@ -743,11 +548,8 @@ void SmileyPackListType::ClearAndFreeAll() m_SmileyPacks.destroy(); } - -// +///////////////////////////////////////////////////////////////////////////////////////// // SmileyCategoryType -// - SmileyCategoryType::SmileyCategoryType(SmileyPackListType *pSPS, const CMStringW &name, const CMStringW &displayName, const CMStringW &defaultFilename, SmcType typ) @@ -775,21 +577,18 @@ void SmileyCategoryType::Load(void) } } - SmileyPackType* SmileyCategoryType::GetSmileyPack(void) { return m_pSmileyPackStore->GetSmileyPack(m_Filename); } - void SmileyCategoryType::SaveSettings(void) { opt.WritePackFileName(m_Filename, m_Name); } -// +///////////////////////////////////////////////////////////////////////////////////////// // SmileyCategoryListType -// void SmileyCategoryListType::ClearAndLoadAll(void) { @@ -799,7 +598,6 @@ void SmileyCategoryListType::ClearAndLoadAll(void) m_SmileyCategories[i].Load(); } - SmileyCategoryType* SmileyCategoryListType::GetSmileyCategory(const CMStringW &name) { for (int i = 0; i < m_SmileyCategories.getCount(); i++) @@ -809,20 +607,17 @@ SmileyCategoryType* SmileyCategoryListType::GetSmileyCategory(const CMStringW &n return nullptr; } - SmileyCategoryType* SmileyCategoryListType::GetSmileyCategory(unsigned index) { return index < (unsigned)m_SmileyCategories.getCount() ? &m_SmileyCategories[index] : nullptr; } - SmileyPackType* SmileyCategoryListType::GetSmileyPack(CMStringW &categoryname) { SmileyCategoryType *smc = GetSmileyCategory(categoryname); return smc != nullptr ? smc->GetSmileyPack() : nullptr; } - void SmileyCategoryListType::SaveSettings(void) { CMStringW catstr; @@ -836,7 +631,6 @@ void SmileyCategoryListType::SaveSettings(void) opt.WriteCustomCategories(catstr); } - void SmileyCategoryListType::AddAndLoad(const CMStringW &name, const CMStringW &displayName) { if (GetSmileyCategory(name) != nullptr) @@ -848,14 +642,12 @@ void SmileyCategoryListType::AddAndLoad(const CMStringW &name, const CMStringW & m_SmileyCategories[m_SmileyCategories.getCount() - 1].Load(); } - void SmileyCategoryListType::AddCategory(const CMStringW &name, const CMStringW &displayName, SmcType typ, const CMStringW &defaultFilename) { if (GetSmileyCategory(name) == nullptr) m_SmileyCategories.insert(new SmileyCategoryType(m_pSmileyPackStore, name, displayName, defaultFilename, typ)); } - bool SmileyCategoryListType::DeleteCustomCategory(int index) { if (index < m_SmileyCategories.getCount()) { @@ -1056,60 +848,37 @@ static const CMStringW testString(L"Test String"); SmileyLookup::SmileyLookup(const CMStringW &str, const bool regexs, const int ind, const CMStringW &smpt) { - wchar_t msgtxt[1024]; - m_ind = ind; if (regexs) { - m_pattern = _TPattern::compile(str); - m_valid = m_pattern != nullptr; - if (m_valid) { - _TMatcher *matcher = m_pattern->createWCMatcher(testString); - m_valid &= (!matcher->findFirstMatch() || - matcher->getStartingIndex() != matcher->getEndingIndex()); - if (!m_valid) { - static const wchar_t errmsg[] = LPGENW("Regular expression \"%s\" in smiley pack \"%s\" could produce \"empty matches\"."); - mir_snwprintf(msgtxt, TranslateW(errmsg), str.c_str(), smpt.c_str()); - } - delete matcher; - } - else { - static const wchar_t errmsg[] = LPGENW("Regular expression \"%s\" in smiley pack \"%s\" malformed."); - mir_snwprintf(msgtxt, TranslateW(errmsg), str.c_str(), smpt.c_str()); - } - - if (!m_valid) + m_pattern.compile(str); + m_valid = m_pattern.isValid(); + if (!m_valid) { + wchar_t msgtxt[1024]; + mir_snwprintf(msgtxt, TranslateT("Regular expression \"%s\" in smiley pack \"%s\" malformed."), str.c_str(), smpt.c_str()); Netlib_LogW(hNetlibUser, msgtxt); + } } else { m_text = str; - m_pattern = nullptr; m_valid = !str.IsEmpty(); } } - SmileyLookup::~SmileyLookup() { - delete m_pattern; } - -void SmileyLookup::Find(const CMStringW &str, SmileyLocVecType &smlcur, bool firstOnly) const +void SmileyLookup::Find(const CMStringW &str, SmileyLocVecType &smlcur, bool firstOnly) { if (!m_valid) return; if (m_text.IsEmpty()) { - _TMatcher *matcher = m_pattern->createWCMatcher(str); - while (matcher->findNextMatch()) { - int st = matcher->getStartingIndex(); - int sz = matcher->getEndingIndex() - st; - if (sz != 0) { - smlcur.insert(new SmileyLocType(st, sz)); - if (firstOnly && m_ind != -1) - return; - } + while (m_pattern.nextMatch(str) >= 0) { + CMStringW wszMatch(m_pattern.getMatch()); + smlcur.insert(new SmileyLocType(m_pattern.getPos(), wszMatch.GetLength())); + if (firstOnly && m_ind != -1) + return; } - delete matcher; } else { const wchar_t *pos = str.c_str(); diff --git a/plugins/SmileyAdd/src/smileys.h b/plugins/SmileyAdd/src/smileys.h index e21687d674..b0f1371d8e 100644 --- a/plugins/SmileyAdd/src/smileys.h +++ b/plugins/SmileyAdd/src/smileys.h @@ -130,7 +130,7 @@ template struct SMOBJLIST : public OBJLIST class SmileyLookup { private: - _TPattern *m_pattern; + MRegexp16 m_pattern; int m_ind; CMStringW m_text; @@ -145,11 +145,11 @@ public: }; typedef SMOBJLIST SmileyLocVecType; - SmileyLookup() { m_ind = 0; m_valid = false; m_pattern = NULL; }; + SmileyLookup() : m_pattern(L"") { m_ind = 0; m_valid = false; m_pattern = NULL; }; SmileyLookup(const CMStringW &str, const bool regexs, const int ind, const CMStringW &smpt); ~SmileyLookup(); - void Find(const CMStringW &str, SmileyLocVecType &smlcur, bool firstOnly) const; + void Find(const CMStringW &str, SmileyLocVecType &smlcur, bool firstOnly); int GetIndex(void) const { return m_ind; } bool IsValid(void) const { return m_valid; } }; @@ -183,7 +183,6 @@ private: void AddTriggersToSmileyLookup(void); void ReplaceAllSpecials(const CMStringW &Input, CMStringW &Output); bool LoadSmileyFileMSL(CMStringW &tbuf, bool onlyInfo, CMStringW &modpath); - bool LoadSmileyFileXEP(CMStringW &tbuf, bool onlyInfo, CMStringW &modpath); public: SmileyPackType(); diff --git a/plugins/SmileyAdd/src/stdafx.h b/plugins/SmileyAdd/src/stdafx.h index 0beb3b3005..227d1744fb 100644 --- a/plugins/SmileyAdd/src/stdafx.h +++ b/plugins/SmileyAdd/src/stdafx.h @@ -65,6 +65,7 @@ along with this program. If not, see . #include #include #include +#include #include #include @@ -72,12 +73,6 @@ along with this program. If not, see . #include #include -#include "regexp/wcpattern.h" -#include "regexp/wcmatcher.h" - -typedef WCPattern _TPattern; -typedef WCMatcher _TMatcher; - #include "resource.h" #include "version.h" #include "imagecache.h" -- cgit v1.2.3