From f920ef497f3299ae24fe783ce03bdd93b419f764 Mon Sep 17 00:00:00 2001 From: Kirill Volinsky Date: Fri, 18 May 2012 22:02:50 +0000 Subject: plugins folders renaming git-svn-id: http://svn.miranda-ng.org/main/trunk@60 1316c22d-e87f-b044-9b9b-93d7a3e3ba9c --- plugins/smileyadd/regexp/Pattern.h | 1663 ------------------------------------ 1 file changed, 1663 deletions(-) delete mode 100644 plugins/smileyadd/regexp/Pattern.h (limited to 'plugins/smileyadd/regexp/Pattern.h') diff --git a/plugins/smileyadd/regexp/Pattern.h b/plugins/smileyadd/regexp/Pattern.h deleted file mode 100644 index bb16ad90fa..0000000000 --- a/plugins/smileyadd/regexp/Pattern.h +++ /dev/null @@ -1,1663 +0,0 @@ -#ifndef __PATTERN_H__ -#define __PATTERN_H__ - -#ifdef _WIN32 - #pragma warning(disable:4786) -#endif - -#include -#include - -#include "bkstring.h" - -class Matcher; -class NFANode; -class NFAQuantifierNode; - -/** - This pattern class is very similar in functionality to Java's - java.util.regex.Pattern class. The pattern class represents an immutable - regular expression object. Instead of having a single object contain both the - regular expression object and the matching object, instead the two objects are - split apart. The {@link Matcher Matcher} class represents the maching - object. - - The Pattern class works primarily off of "compiled" patterns. A typical - instantiation of a regular expression looks like: - -
-  Pattern * p = Pattern::compile("a*b");
-  Matcher * m = p->createMatcher("aaaaaab");
-  if (m->matches()) ...
-  
- - However, if you do not need to use a pattern more than once, it is often times - okay to use the Pattern's static methods insteads. An example looks like this: - -
-  if (Pattern::matches("a*b", "aaaab")) { ... }
-  
- - This class does not currently support unicode. The unicode update for this - class is coming soon. - - This class is partially immutable. It is completely safe to call createMatcher - concurrently in different threads, but the other functions (e.g. split) should - not be called concurrently on the same Pattern. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- Construct - - Matches - -
-   -
- Characters -
- x - - The character x -
- \\ - - The character \ -
- \0nn - - The character with octal ASCII value nn -
- \0nnn - - The character with octal ASCII value nnn -
- \xhh - - The character with hexadecimal ASCII value hh -
- \t - - A tab character -
- \r - - A carriage return character -
- \n - - A new-line character -
-   -
- Character Classes -
- [abc] - - Either a, b, or c -
- [^abc] - - Any character but a, b, or c -
- [a-zA-Z] - - Any character ranging from a thru z, or - A thru Z -
- [^a-zA-Z] - - Any character except those ranging from a thru - z, or A thru Z -
- [a\-z] - - Either a, -, or z -
- [a-z[A-Z]] - - Same as [a-zA-Z] -
- [a-z&&[g-i]] - - Any character in the intersection of a-z and - g-i -
- [a-z&&[^g-i]] - - Any character in a-z and not in g-i -
-   -
- Prefefined character classes -
- . - - Any character. Multiline matching must be compiled into the pattern for - . to match a \r or a \n. - Even if multiline matching is enabled, . will not - match a \r\n, only a \r or a \n. -
- \d - - [0-9] -
- \D - - [^\d] -
- \s - - [ \t\r\n\x0B] -
- \S - - [^\s] -
- \w - - [a-zA-Z0-9_] -
- \W - - [^\w] -
-   -
- POSIX character classes -
- \p{Lower} - - [a-z] -
- \p{Upper} - - [A-Z] -
- \p{ASCII} - - [\x00-\x7F] -
- \p{Alpha} - - [a-zA-Z] -
- \p{Digit} - - [0-9] -
- \p{Alnum} - - [\w&&[^_]] -
- \p{Punct} - - [!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~] -
- \p{XDigit} - - [a-fA-F0-9] -
-   -
- Boundary Matches -
- ^ - - The beginning of a line. Also matches the beginning of input. -
- $ - - The end of a line. Also matches the end of input. -
- \b - - A word boundary -
- \B - - A non word boundary -
- \A - - The beginning of input -
- \G - - The end of the previous match. Ensures that a "next" match will only - happen if it begins with the character immediately following the end of - the "current" match. -
- \Z - - The end of input. Will also match if there is a single trailing - \r\n, a single trailing \r, or a single - trailing \n. -
- \z - - The end of input -
-   -
- Greedy Quantifiers -
- x? - - x, either zero times or one time -
- x* - - x, zero or more times -
- x+ - - x, one or more times -
- x{n} - - x, exactly n times -
- x{n,} - - x, at least n times -
- x{,m} - - x, at most m times -
- x{n,m} - - x, at least n times and at most - m times -
-   -
- Possessive Quantifiers -
- x?+ - - x, either zero times or one time -
- x*+ - - x, zero or more times -
- x++ - - x, one or more times -
- x{n}+ - - x, exactly n times -
- x{n,}+ - - x, at least n times -
- x{,m}+ - - x, at most m times -
- x{n,m}+ - - x, at least n times and at most - m times -
-   -
- Reluctant Quantifiers -
- x?? - - x, either zero times or one time -
- x*? - - x, zero or more times -
- x+? - - x, one or more times -
- x{n}? - - x, exactly n times -
- x{n,}? - - x, at least n times -
- x{,m}? - - x, at most m times -
- x{n,m}? - - x, at least n times and at most - m times -
-   -
- Operators -
- xy - - x then y -
- x|y - - x or y -
- (x) - - x as a capturing group -
-   -
- Quoting -
- \Q - - Nothing, but treat every character (including \s) literally until a - matching \E -
- \E - - Nothing, but ends its matching \Q -
-   -
- Special Constructs -
- (?:x) - - x, but not as a capturing group -
- (?=x) - - x, via positive lookahead. This means that the - expression will match only if it is trailed by x. - It will not "eat" any of the characters matched by - x. -
- (?!x) - - x, via negative lookahead. This means that the - expression will match only if it is not trailed by - x. It will not "eat" any of the characters - matched by x. -
- (?<=x) - - x, via positive lookbehind. x - cannot contain any quantifiers. -
- (?x) - - x, via negative lookbehind. x - cannot contain any quantifiers. -
- (?>x) - - x{1}+ -
-   -
- Registered Expression Matching -
- {x} - - The registered pattern x -
- -
- - Begin Text Extracted And Modified From java.util.regex.Pattern documentation - -

Backslashes, escapes, and quoting

- -

The backslash character ('\') serves to introduce escaped - constructs, as defined in the table above, as well as to quote characters - that otherwise would be interpreted as unescaped constructs. Thus the - expression \\ matches a single backslash and \{ matches a - left brace. - -

It is an error to use a backslash prior to any alphabetic character that - does not denote an escaped construct; these are reserved for future - extensions to the regular-expression language. A backslash may be used - prior to a non-alphabetic character regardless of whether that character is - part of an unescaped construct. - -

It is necessary to double backslashes in string literals that represent - regular expressions to protect them from interpretation by a compiler. The - string literal "\b", for example, matches a single backspace - character when interpreted as a regular expression, while - "\\b" matches a word boundary. The string litera - "\(hello\)" is illegal and leads to a compile-time error; - in order to match the string (hello) the string literal - "\\(hello\\)" must be used. - -

Character Classes

- -

Character classes may appear within other character classes, and - may be composed by the union operator (implicit) and the intersection - operator (&&). - The union operator denotes a class that contains every character that is - in at least one of its operand classes. The intersection operator - denotes a class that contains every character that is in both of its - operand classes. - -

The precedence of character-class operators is as follows, from - highest to lowest: - -

- - - - - - - - - - - - - - - - -
1    Literal escape    \x
2    Rangea-z
3    Grouping[...]
4    Intersection[a-z&&[aeiou]]
5    Union[a-e][i-u]
- -

Note that a different set of metacharacters are in effect inside - a character class than outside a character class. For instance, the - regular expression . loses its special meaning inside a - character class, while the expression - becomes a range - forming metacharacter. - - - - -

Groups and capturing

- -

Capturing groups are numbered by counting their opening parentheses from - left to right. In the expression ((A)(B(C))), for example, there - are four such groups:

- -
- - - - - - - - - - -
1    ((A)(B(C)))
2    (A)
3    (B(C))
4    (C)
- -

Group zero always stands for the entire expression. - -

Capturing groups are so named because, during a match, each subsequence - of the input sequence that matches such a group is saved. The captured - subsequence may be used later in the expression, via a back reference, and - may also be retrieved from the matcher once the match operation is complete. - -

The captured input associated with a group is always the subsequence - that the group most recently matched. If a group is evaluated a second time - because of quantification then its previously-captured value, if any, will - be retained if the second evaluation fails. Matching the string - "aba" against the expression (a(b)?)+, for example, leaves - group two set to "b". All captured input is discarded at the - beginning of each match. - -

Groups beginning with (? are pure, non-capturing groups - that do not capture text and do not count towards the group total. - - -

Unicode support

- -

Coming Soon. - -

Comparison to Perl 5

- -

The Pattern engine performs traditional NFA-based matching - with ordered alternation as occurs in Perl 5. - -

Perl constructs not supported by this class:

- -
    - -
  • The conditional constructs (?{X}) and - (?(condition)X|Y), -

  • - -
  • The embedded code constructs (?{code}) - and (??{code}),

  • - -
  • The embedded comment syntax (?#comment), and

  • - -
  • The preprocessing operations \l \u, - \L, and \U.

  • - -
  • Embedded flags

  • - -
- -

Constructs supported by this class but not by Perl:

- -
    - -
  • Possessive quantifiers, which greedily match as much as they can - and do not back off, even when doing so would allow the overall match to - succeed.

  • - -
  • Character-class union and intersection as described - above.

  • - -
- -

Notable differences from Perl:

- -
    - -
  • In Perl, \1 through \9 are always interpreted - as back references; a backslash-escaped number greater than 9 is - treated as a back reference if at least that many subexpressions exist, - otherwise it is interpreted, if possible, as an octal escape. In this - class octal escapes must always begin with a zero. In this class, - \1 through \9 are always interpreted as back - references, and a larger number is accepted as a back reference if at - least that many subexpressions exist at that point in the regular - expression, otherwise the parser will drop digits until the number is - smaller or equal to the existing number of groups or it is one digit. -

  • - -
  • Perl uses the g flag to request a match that resumes - where the last match left off. This functionality is provided implicitly - by the Matcher class: Repeated invocations of the - find method will resume where the last match left off, - unless the matcher is reset.

  • - -
  • Perl is forgiving about malformed matching constructs, as in the - expression *a, as well as dangling brackets, as in the - expression abc], and treats them as literals. This - class also strict and will not compile a pattern when dangling characters - are encountered.

  • - -
- - -

For a more precise description of the behavior of regular expression - constructs, please see - Mastering Regular Expressions, 2nd Edition, Jeffrey E. F. Friedl, - O'Reilly and Associates, 2002. -

-

- - End Text Extracted And Modified From java.util.regex.Pattern documentation - -


- - @author Jeffery Stuart - @since March 2003, Stable Since November 2004 - @version 1.07.00 - @memo A class used to represent "PERL 5"-ish regular expressions - */ -class Pattern -{ - friend class Matcher; - friend class NFANode; - friend class NFAQuantifierNode; - private: - /** - This constructor should not be called directly. Those wishing to use the - Pattern class should instead use the {@link compile compile} method. - - @param rhs The pattern to compile - @memo Creates a new pattern from the regular expression in rhs. - */ - Pattern(const bkstring & rhs); - protected: - /** - This currently is not used, so don't try to do anything with it. - @memo Holds all the compiled patterns for quick access. - */ - static std::map compiledPatterns; - /** - Holds all of the registered patterns as strings. Due to certain problems - with compilation of patterns, especially with capturing groups, this seemed - to be the best way to do it. - */ - static std::map > registeredPatterns; - protected: - /** - Holds all the NFA nodes used. This makes deletion of a pattern, as well as - clean-up from an unsuccessful compile much easier and faster. - */ - std::map nodes; - /** - Used when methods like split are called. The matcher class uses a lot of - dynamic memeory, so having an instance increases speedup of certain - operations. - */ - Matcher * matcher; - /** - The front node of the NFA. - */ - NFANode * head; - /** - The actual regular expression we rerpesent - */ - bkstring pattern; - /** - Flag used during compilation. Once the pattern is successfully compiled, - error is no longer used. - */ - bool error; - /** - Used during compilation to keep track of the current index into - {@link pattern pattern}. Once the pattern is successfully - compiled, error is no longer used. - */ - int curInd; - /** - The number of capture groups this contains. - */ - int groupCount; - /** - The number of non-capture groups this contains. - */ - int nonCapGroupCount; - /** - The flags specified when this was compiled. - */ - unsigned long flags; - protected: - /** - Raises an error during compilation. Compilation will cease at that point - and compile will return NULL. - */ - void raiseError(); - /** - Convenience function for registering a node in nodes. - @param node The node to register - @return The registered node - */ - NFANode * registerNode(NFANode * node); - - /** - Calculates the union of two strings. This function will first sort the - strings and then use a simple selection algorithm to find the union. - @param s1 The first "class" to union - @param s2 The second "class" to union - @return A new string containing all unique characters. Each character - must have appeared in one or both of s1 and - s2. - */ - bkstring classUnion (bkstring s1, bkstring s2) const; - /** - Calculates the intersection of two strings. This function will first sort - the strings and then use a simple selection algorithm to find the - intersection. - @param s1 The first "class" to intersect - @param s2 The second "class" to intersect - @return A new string containing all unique characters. Each character - must have appeared both s1 and s2. - */ - bkstring classIntersect (bkstring s1, bkstring s2) const; - /** - Calculates the negation of a string. The negation is the set of all - characters between \x00 and \xFF not - contained in s1. - @param s1 The "class" to be negated. - @param s2 The second "class" to intersect - @return A new string containing all unique characters. Each character - must have appeared both s1 and s2. - */ - bkstring classNegate (bkstring s1) const; - /** - Creates a new "class" representing the range from low thru - hi. This function will wrap if low > - hi. This is a feature, not a buf. Sometimes it is useful - to be able to say [\x70-\x10] instead of [\x70-\x7F\x00-\x10]. - @param low The beginning character - @param hi The ending character - @return A new string containing all the characters from low thru hi. - */ - bkstring classCreateRange(char low, char hi) const; - - /** - Extracts a decimal number from the substring of member-variable - {@link pattern pattern} starting at start and - ending at end. - @param start The starting index in {@link pattern pattern} - @param end The last index in {@link pattern pattern} - @return The decimal number in {@link pattern pattern} - */ - int getInt(int start, int end); - /** - Parses a {n,m} string out of the member-variable - {@link pattern pattern} stores the result in sNum - and eNum. - @param sNum Output parameter. The minimum number of matches required - by the curly quantifier are stored here. - @param eNum Output parameter. The maximum number of matches allowed - by the curly quantifier are stored here. - @return Success/Failure. Fails when the curly does not have the proper - syntax - */ - bool quantifyCurly(int & sNum, int & eNum); - /** - Tries to quantify the currently parsed group. If the group being parsed - is indeed quantified in the member-variable - {@link pattern pattern}, then the NFA is modified accordingly. - @param start The starting node of the current group being parsed - @param stop The ending node of the current group being parsed - @param gn The group number of the current group being parsed - @return The node representing the starting node of the group. If the - group becomes quantified, then this node is not necessarily - a GroupHead node. - */ - NFANode * quantifyGroup(NFANode * start, NFANode * stop, const int gn); - - /** - Tries to quantify the last parsed expression. If the character was indeed - quantified, then the NFA is modified accordingly. - @param newNode The recently created expression node - @return The node representing the last parsed expression. If the - expression was quantified, return value != newNode - */ - NFANode * quantify(NFANode * newNode); - /** - Parses the current class being examined in - {@link pattern pattern}. - @return A string of unique characters contained in the current class being - parsed - */ - bkstring parseClass(); - /** - Parses the current POSIX class being examined in - {@link pattern pattern}. - @return A string of unique characters representing the POSIX class being - parsed - */ - bkstring parsePosix(); - /** - Returns a string containing the octal character being parsed - @return The string contained the octal value being parsed - */ - bkstring parseOctal(); - /** - Returns a string containing the hex character being parsed - @return The string contained the hex value being parsed - */ - bkstring parseHex(); - /** - Returns a new node representing the back reference being parsed - @return The new node representing the back reference being parsed - */ - NFANode * parseBackref(); - /** - Parses the escape sequence currently being examined. Determines if the - escape sequence is a class, a single character, or the beginning of a - quotation sequence. - @param inv Output parameter. Whether or not to invert the returned class - @param quo Output parameter. Whether or not this sequence starts a - quotation. - @return The characters represented by the class - */ - bkstring parseEscape(bool & inv, bool & quo); - /** - Parses a supposed registered pattern currently under compilation. If the - sequence of characters does point to a registered pattern, then the - registered pattern is appended to *end. The registered pattern - is parsed with the current compilation flags. - @param end The ending node of the thus-far compiled pattern - @return The new end node of the current pattern - */ - NFANode * parseRegisteredPattern(NFANode ** end); - /** - Parses a lookbehind expression. Appends the necessary nodes - *end. - @param pos Positive or negative look behind - @param end The ending node of the current pattern - @return The new end node of the current pattern - */ - NFANode * parseBehind(const bool pos, NFANode ** end); - /** - Parses the current expression and tacks on nodes until a \E is found. - @return The end of the current pattern - */ - NFANode * parseQuote(); - /** - Parses {@link pattern pattern}. This function is called - recursively when an or (|) or a group is encountered. - @param inParen Are we currently parsing inside a group - @param inOr Are we currently parsing one side of an or (|) - @param end The end of the current expression - @return The starting node of the NFA constructed from this parse - */ - NFANode * parse(const bool inParen = 0, const bool inOr = 0, NFANode ** end = NULL); - public: - /// We should match regardless of case - const static unsigned long CASE_INSENSITIVE; - /// We are implicitly quoted - const static unsigned long LITERAL; - /// @memo We should treat a . as [\x00-\x7F] - const static unsigned long DOT_MATCHES_ALL; - /** ^ and $ should anchor to the beginning and - ending of lines, not all input - */ - const static unsigned long MULTILINE_MATCHING; - /** When enabled, only instances of \n are recognized as - line terminators - */ - const static unsigned long UNIX_LINE_MODE; - /// The absolute minimum number of matches a quantifier can match (0) - const static int MIN_QMATCH; - /// The absolute maximum number of matches a quantifier can match (0x7FFFFFFF) - const static int MAX_QMATCH; - public: - /** - Call this function to compile a regular expression into a - Pattern object. Special values can be assigned to - mode when certain non-standard behaviors are expected from - the Pattern object. - @param pattern The regular expression to compile - @param mode A bitwise or of flags signalling what special behaviors are - wanted from this Pattern object - @return If successful, compile returns a Pattern - pointer. Upon failure, compile returns - NULL - */ - static Pattern * compile (const bkstring & pattern, - const unsigned long mode = 0); - /** - Dont use this function. This function will compile a pattern, and cache - the result. This will eventually be used as an optimization when people - just want to call static methods using the same pattern over and over - instead of first compiling the pattern and then using the compiled - instance for matching. - @param pattern The regular expression to compile - @param mode A bitwise or of flags signalling what special behaviors are - wanted from this Pattern object - @return If successful, compileAndKeep returns a - Pattern pointer. Upon failure, compile - returns NULL. - */ - static Pattern * compileAndKeep (const bkstring & pattern, - const unsigned long mode = 0); - - /** - Searches through replace and replaces all substrings matched - by pattern with str. str may - contain backreferences (e.g. \1) to capture groups. A typical - invocation looks like: -

- - Pattern::replace("(a+)b(c+)", "abcccbbabcbabc", "\\2b\\1"); - -

- which would replace abcccbbabcbabc with - cccbabbcbabcba. - @param pattern The regular expression - @param str The replacement text - @param replacementText The string in which to perform replacements - @param mode The special mode requested of the Pattern - during the replacement process - @return The text with the replacement string substituted where necessary - */ - static bkstring replace (const bkstring & pattern, - const bkstring & str, - const bkstring & replacementText, - const unsigned long mode = 0); - - /** - Splits the specified string over occurrences of the specified pattern. - Empty strings can be optionally ignored. The number of strings returned is - configurable. A typical invocation looks like: -

- - bkstring str(strSize, '\0');
- FILE * fp = fopen(fileName, "r");
- fread((char*)str.data(), strSize, 1, fp);
- fclose(fp);
-
- std::vector<bkstring> lines = Pattern::split("[\r\n]+", str, true);
-
-
- - @param pattern The regular expression - @param replace The string to split - @param keepEmptys Whether or not to keep empty strings - @param limit The maximum number of splits to make - @param mode The special mode requested of the Pattern - during the split process - @return All substrings of str split across pattern. - */ - static std::vector split (const bkstring & pattern, - const bkstring & str, - const bool keepEmptys = 0, - const unsigned long limit = 0, - const unsigned long mode = 0); - - /** - Finds all the instances of the specified pattern within the string. You - should be careful to only pass patterns with a minimum length of one. For - example, the pattern a* can be matched by an empty string, so - instead you should pass a+ since at least one character must - be matched. A typical invocation of findAll looks like: -

- - std::vector<td::string> numbers = Pattern::findAll("\\d+", string); - -

- - @param pattern The pattern for which to search - @param str The string to search - @param mode The special mode requested of the Pattern - during the find process - @return All instances of pattern in str - */ - static std::vector findAll (const bkstring & pattern, - const bkstring & str, - const unsigned long mode = 0); - - /** - Determines if an entire string matches the specified pattern - - @param pattern The pattern for to match - @param str The string to match - @param mode The special mode requested of the Pattern - during the replacement process - @return True if str is recognized by pattern - */ - static bool matches (const bkstring & pattern, - const bkstring & str, - const unsigned long mode = 0); - - /** - Registers a pattern under a specific name for use in later compilations. - A typical invocation and later use looks like: -

- - Pattern::registerPattern("ip", "(?:\\d{1,3}\\.){3}\\d{1,3}");
- Pattern * p1 = Pattern::compile("{ip}:\\d+");
- Pattern * p2 = Pattern::compile("Connection from ({ip}) on port \\d+");
-
-

- Multiple calls to registerPattern with the same - name will result in the pattern getting overwritten. - - @param name The name to give to the pattern - @param pattern The pattern to register - @param mode Any special flags to use when compiling pattern - @return Success/Failure. Fails only if pattern has invalid - syntax - */ - static bool registerPattern(const bkstring & name, - const bkstring & pattern, - const unsigned long mode = 0); - - /** - Clears the pattern registry - */ - static void unregisterPatterns(); - /** - Don't use - */ - static void clearPatternCache(); - - /** - Searches through a string for the nth match of the - given pattern in the string. Match indeces start at zero, not one. - A typical invocation looks like this: -

- - std::pair<bkstring, int> match = Pattern::findNthMatch("\\d{1,3}", "192.168.1.101:22", 1);
- printf("%s %i\n", match.first.c_str(), match.second);
-
- Output: 168 4
-
- - @param pattern The pattern for which to search - @param str The string to search - @param matchNum Which match to find - @param mode Any special flags to use during the matching process - @return A string and an integer. The string is the string matched. The - integer is the starting location of the matched string in - str. You can check for success/failure by making sure - that the integer returned is greater than or equal to zero. - */ - static std::pair findNthMatch (const bkstring & pattern, - const bkstring & str, - const int matchNum, - const unsigned long mode = 0); - public: - /** - Deletes all NFA nodes allocated during compilation - */ - ~Pattern(); - - bkstring replace (const bkstring & str, - const bkstring & replacementText); - std::vector split (const bkstring & str, const bool keepEmptys = 0, - const unsigned long limit = 0); - std::vector findAll (const bkstring & str); - bool matches (const bkstring & str); - /** - Returns the flags used during compilation of this pattern - @return The flags used during compilation of this pattern - */ - unsigned long getFlags () const; - /** - Returns the regular expression this pattern represents - @return The regular expression this pattern represents - */ - bkstring getPattern () const; - /** - Creates a matcher object using the specified string and this pattern. - @param str The string to match against - @return A new matcher using object using this pattern and the specified - string - */ - Matcher * createMatcher (const bkstring & str); -}; - -class NFANode -{ - friend class Matcher; - public: - NFANode * next; - NFANode(); - virtual ~NFANode(); - virtual void findAllNodes(std::map & soFar); - virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const = 0; - inline virtual bool isGroupHeadNode() const { return false; } - inline virtual bool isStartOfInputNode() const { return false; } -}; -class NFACharNode : public NFANode -{ - protected: - char ch; - public: - NFACharNode(const char c); - virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const; -}; -class NFACICharNode : public NFANode -{ - protected: - char ch; - public: - NFACICharNode(const char c); - virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const; -}; -class NFAStartNode : public NFANode -{ - public: - NFAStartNode(); - virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const; -}; -class NFAEndNode : public NFANode -{ - public: - NFAEndNode(); - virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const; -}; -class NFAQuantifierNode : public NFANode -{ - public: - int min, max; - NFANode * inner; - virtual void findAllNodes(std::map & soFar); - NFAQuantifierNode(Pattern * pat, NFANode * internal, - const int minMatch = Pattern::MIN_QMATCH, - const int maxMatch = Pattern::MAX_QMATCH); - virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const; -}; -class NFAGreedyQuantifierNode : public NFAQuantifierNode -{ - public: - NFAGreedyQuantifierNode(Pattern * pat, NFANode * internal, - const int minMatch = Pattern::MIN_QMATCH, - const int maxMatch = Pattern::MAX_QMATCH); - virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const; - virtual int matchInternal(const bkstring & str, Matcher * matcher, const int curInd, const int soFar) const; -}; -class NFALazyQuantifierNode : public NFAQuantifierNode -{ - public: - NFALazyQuantifierNode(Pattern * pat, NFANode * internal, - const int minMatch = Pattern::MIN_QMATCH, - const int maxMatch = Pattern::MAX_QMATCH); - virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const; -}; -class NFAPossessiveQuantifierNode : public NFAQuantifierNode -{ - public: - NFAPossessiveQuantifierNode(Pattern * pat, NFANode * internal, - const int minMatch = Pattern::MIN_QMATCH, - const int maxMatch = Pattern::MAX_QMATCH); - virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const; -}; -class NFAAcceptNode : public NFANode -{ - public: - NFAAcceptNode(); - virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const; -}; -class NFAClassNode : public NFANode -{ - public: - bool inv; - std::map vals; - NFAClassNode(const bool invert = 0); - NFAClassNode(const bkstring & clazz, const bool invert); - virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const; -}; -class NFACIClassNode : public NFANode -{ - public: - bool inv; - std::map vals; - NFACIClassNode(const bool invert = 0); - NFACIClassNode(const bkstring & clazz, const bool invert); - virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const; -}; -class NFASubStartNode : public NFANode -{ - public: - NFASubStartNode(); - virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const; -}; -class NFAOrNode : public NFANode -{ - public: - NFANode * one; - NFANode * two; - NFAOrNode(NFANode * first, NFANode * second); - virtual void findAllNodes(std::map & soFar); - virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const; -}; -class NFAQuoteNode : public NFANode -{ - public: - bkstring qStr; - NFAQuoteNode(const bkstring & quoted); - virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const; -}; -class NFACIQuoteNode : public NFANode -{ - public: - bkstring qStr; - NFACIQuoteNode(const bkstring & quoted); - virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const; -}; -class NFALookAheadNode : public NFANode -{ - public: - bool pos; - NFANode * inner; - NFALookAheadNode(NFANode * internal, const bool positive); - virtual void findAllNodes(std::map & soFar); - virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const; -}; -class NFALookBehindNode : public NFANode -{ - public: - bool pos; - bkstring mStr; - NFALookBehindNode(const bkstring & str, const bool positive); - virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const; -}; -class NFAStartOfLineNode : public NFANode -{ - public: - NFAStartOfLineNode(); - virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const; -}; -class NFAEndOfLineNode : public NFANode -{ - public: - NFAEndOfLineNode(); - virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const; -}; -class NFAReferenceNode : public NFANode -{ - public: - int gi; - NFAReferenceNode(const int groupIndex); - virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const; -}; -class NFAStartOfInputNode : public NFANode -{ - public: - NFAStartOfInputNode(); - virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const; - inline virtual bool isStartOfInputNode() const { return true; } -}; -class NFAEndOfInputNode : public NFANode -{ - public: - bool term; - NFAEndOfInputNode(const bool lookForTerm); - virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const; -}; -class NFAWordBoundaryNode : public NFANode -{ - public: - bool pos; - NFAWordBoundaryNode(const bool positive); - virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const; -}; -class NFAEndOfMatchNode : public NFANode -{ - public: - NFAEndOfMatchNode(); - virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const; -}; -class NFAGroupHeadNode : public NFANode -{ - public: - int gi; - NFAGroupHeadNode(const int groupIndex); - virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const; - inline virtual bool isGroupHeadNode() const { return true; } -}; -class NFAGroupTailNode : public NFANode -{ - public: - int gi; - NFAGroupTailNode(const int groupIndex); - virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const; -}; -class NFAGroupLoopPrologueNode : public NFANode -{ - public: - int gi; - NFAGroupLoopPrologueNode(const int groupIndex); - virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const; -}; -class NFAGroupLoopNode : public NFANode -{ - public: - int gi, min, max, type; - NFANode * inner; - NFAGroupLoopNode(NFANode * internal, const int minMatch, - const int maxMatch, const int groupIndex, const int matchType); - virtual void findAllNodes(std::map & soFar); - virtual int match(const bkstring & str, Matcher * matcher, const int curInd = 0) const; - int matchGreedy(const bkstring & str, Matcher * matcher, const int curInd = 0) const; - int matchLazy(const bkstring & str, Matcher * matcher, const int curInd = 0) const; - int matchPossessive(const bkstring & str, Matcher * matcher, const int curInd = 0) const; -}; - -#endif - -- cgit v1.2.3