From a33833212f040272fc6c97047c8cb335b6f5405a Mon Sep 17 00:00:00 2001 From: Vadim Dashevskiy Date: Tue, 24 Jul 2012 06:41:19 +0000 Subject: SimpleAR, SimpleStatusMsg, SmileyAdd, SpellChecker: changed folder structure git-svn-id: http://svn.miranda-ng.org/main/trunk@1149 1316c22d-e87f-b044-9b9b-93d7a3e3ba9c --- plugins/SmileyAdd/src/regexp/WCMatcher.h | 234 +++++++++++++++++++++++++++++++ 1 file changed, 234 insertions(+) create mode 100644 plugins/SmileyAdd/src/regexp/WCMatcher.h (limited to 'plugins/SmileyAdd/src/regexp/WCMatcher.h') diff --git a/plugins/SmileyAdd/src/regexp/WCMatcher.h b/plugins/SmileyAdd/src/regexp/WCMatcher.h new file mode 100644 index 0000000000..23cc49e41f --- /dev/null +++ b/plugins/SmileyAdd/src/regexp/WCMatcher.h @@ -0,0 +1,234 @@ +#ifndef __WCMATCHER_H__ +#define __WCMATCHER_H__ + +#include "bkstring.h" +#include +#include + +/** + A matcher is a non thread-safe object used to scan strings using a given + {@link WCPattern WCPattern} object. Using a WCMatcher is the preferred + method for scanning strings. WCMatchers are not thread-safe. WCMatchers require + very little dynamic memory, hence one is encouraged to create several + instances of a matcher when necessary as opposed to sharing a single instance + of a matcher. +

+ The most common methods needed by the matcher are matches, + findNextMatch, and getGroup. matches + and findNextMatch both return success or failure, and further + details can be gathered from their documentation. +

+ Unlike Java's WCMatcher, this class allows you to change the string + you are matching against. This provides a small optimization, since you no + longer need multiple matchers for a single pattern in a single thread. +

+ This class also provides an extremely handy method for replacing text with + captured data via the replaceWithGroups method. A typical + invocation looks like: +

+  wchar_t buf[10000];
+  bkstring str = "\\5 (user name \\1) uses \\7 for his/her shell and \\6 is their home directory";
+  FILE * fp = fopen("/etc/passwd", "r");
+  WCPattern::registerWCPattern("entry", "[^:]+");
+  WCPattern * p = WCPattern::compile("^({entry}):({entry}):({entry}):({entry}):({entry}):({entry}):({entry})$",
+                                 WCPattern::MULTILINE_MATCHING | WCPattern::UNIX_LINE_MODE);
+  WCMatcher * m = p->createWCMatcher("");
+  while (fgets(buf, 9999, fp))
+  {
+    m->setString(buf);
+    if (m->matches())
+    {
+      printf("%s\n", m->replaceWithGroups(str).c_str());
+    }
+  }
+  fclose(fp);
+
+  
+ Calling any of the following functions before first calling + matches, findFirstMatch, or + findNextMatch results in undefined behavior and may cause your + program to crash. + +
    +
  • replaceWithGroups +
  • getStartingIndex
  • +
  • getEndingIndex
  • +
  • getGroup
  • +
  • getGroups
  • +
+ +

+ The function findFirstMatch will attempt to find the first match + in the input string. The same results can be obtained by first calling + reset followed by findNextMatch. +

+ To eliminate the necessity of looping through a string to find all the + matching substrings, findAll was created. The function will find + all matching substrings and return them in a vector. If you need + to examine specific capture groups within the substrings, then this method + should not be used. + + @author Jeffery Stuart + @since March 2003, Stable Since November 2004 + @version 1.05.00 + @memo Mutable object used on instances of a WCPattern class + */ +class WCMatcher +{ + friend class NFAUNode; + friend class NFAStartUNode; + friend class NFAEndUNode; + friend class NFAGroupHeadUNode; + friend class NFAGroupLoopUNode; + friend class NFAGroupLoopPrologueUNode; + friend class NFAGroupTailUNode; + friend class NFALookBehindUNode; + friend class NFAStartOfLineUNode; + friend class NFAEndOfLineUNode; + friend class NFAEndOfMatchUNode; + friend class NFAReferenceUNode; + friend class WCPattern; + private: + /** + Creates a new matcher object against text using + pattern. + + @param pattern The pattern with which to search + @param text The text in which to search + */ + WCMatcher(WCPattern * pattern, const bkstring & text); + protected: + /// The pattern we use to match + WCPattern * pat; + /// The string in which we are matching + const bkstring * str; + /// The starting point of our match + int start; + /// An array of the starting positions for each group + int * starts; + /// An array of the ending positions for each group + int * ends; + /// An array of private data used by NFAUNodes during matching + int * groups; + /// An array of private data used by NFAUNodes during matching + int * groupIndeces; + /// An array of private data used by NFAUNodes during matching + int * groupPos; + /// The ending index of the last match + int lm; + /// The number of capturing groups we have + int gc; + /// The number of non-capturing groups we havew + int ncgc; + /// Whether or not we have matched something (used only by findFirstMatch and findNextMatch) + int matchedSomething; + /// The flags with which we were made + unsigned long flags; + /// Called by reset to clear the group arrays + void clearGroups(); + public: + /// Used internally by match to signify we want the entire string matched + const static int MATCH_ENTIRE_STRING; + public: + /// Cleans up the dynamic memory used by this matcher + ~WCMatcher(); + /** + Replaces the contents of str with the appropriate captured + text. str should have at least one back reference, otherwise + this function does nothing. + @param str The string in which to replace text + @return A string with all backreferences appropriately replaced + */ + bkstring replaceWithGroups(const bkstring & str); + /** + The flags currently being used by the matcher. + @return Zero + */ + unsigned long getFlags() const; + /** + The text being searched by the matcher. + @return the text being searched by the matcher. + */ + const bkstring& getText() const; + + /** + Scans the string from start to finish for a match. The entire string must + match for this function to return success. Group variables are + appropriately set and can be queried after this function returns. + + @return Success if and only if the entire string matches the pattern + */ + bool matches(); + /** + Scans the string for the first substring matching the pattern. The entire + string does not necessarily have to match for this function to return + success. Group variables are appropriately set and can be queried after + this function returns. + + @return Success if any substring matches the specified pattern + */ + bool findFirstMatch(); + /** + Scans the string for the next substring matching the pattern. If no calls + have been made to findFirstMatch of findNextMatch since the last call to + reset, matches, or setString, then this function's behavior results to + that of findFirstMatch. + + @return Success if another substring can be found that matches the pattern + */ + bool findNextMatch(); + /** + Returns a vector of every substring in order which matches the given + pattern. + + @return Every substring in order which matches the given pattern + */ + std::vector findAll(); + /** + Resets the internal state of the matcher + */ + void reset(); + /** + Same as getText. Left n for backwards compatibilty with old source code + @return Returns the string that is currently being used for matching + */ + inline const bkstring& getString() const { return *str; } + /** + Sets the string to scan + @param newStr The string to scan for subsequent matches + */ + inline void setString(const bkstring & newStr) { str = &newStr; reset(); } + + /** + Returns the starting index of the specified group. + @param groupNum The group to query + @return The starting index of the group if it was matched, -1 for an + invalid group or if the group was not matched + */ + int getStartingIndex(const int groupNum = 0) const; + /** + Returns the ending index of the specified group. + @param groupNum The group to query + @return The ending index of the group if it was matched, -1 for an + invalid group or if the group was not matched + */ + int getEndingIndex(const int groupNum = 0) const; + /** + Returns the specified group. An empty string ("") does not necessarily + mean the group was not matched. A group such as (a*b?) could be matched by + a zero length. If an empty string is returned, getStartingIndex can be + called to determine if the group was actually matched. + @param groupNum The group to query + @return The text of the group + */ + bkstring getGroup(const int groupNum = 0) const; + /** + Returns every capture group in a vector + + @param includeGroupZero Whether or not include capture group zero + @return Every capture group + */ + std::vector getGroups(const bool includeGroupZero = 0) const; +}; + +#endif -- cgit v1.2.3