diff options
author | mataes2007 <mataes2007@e753b5eb-9565-29b2-b5c5-2cc6f99dfbcb> | 2011-11-26 15:41:10 +0000 |
---|---|---|
committer | mataes2007 <mataes2007@e753b5eb-9565-29b2-b5c5-2cc6f99dfbcb> | 2011-11-26 15:41:10 +0000 |
commit | f04d64869f3b1de54fb343f28f955584780001b8 (patch) | |
tree | 5453dc10de3d980de79ffe019fa0b5fcb692a27d /importtxt/PerlRegEx.pas | |
parent | 7aff1e4cb053394db57c2814d5fe1e6493e0cc75 (diff) |
Project folders rename part 3
git-svn-id: http://miranda-plugins.googlecode.com/svn/trunk@215 e753b5eb-9565-29b2-b5c5-2cc6f99dfbcb
Diffstat (limited to 'importtxt/PerlRegEx.pas')
-rw-r--r-- | importtxt/PerlRegEx.pas | 447 |
1 files changed, 0 insertions, 447 deletions
diff --git a/importtxt/PerlRegEx.pas b/importtxt/PerlRegEx.pas deleted file mode 100644 index 617479f..0000000 --- a/importtxt/PerlRegEx.pas +++ /dev/null @@ -1,447 +0,0 @@ -(**********************************************************
-* *
-* Perl Regular Expressions *
-* *
-* Delphi wrapper around PCRE - http://www.pcre.org *
-* *
-* Copyright (C) 1999-2006 Jan Goyvaerts *
-* *
-* Design & implementation Jan Goyvaerts 1999-2006 *
-* *
-* *
-* Shorted and added partial ability for PCRE 7.0 *
-* by Abyss *
-**********************************************************)
-
-unit PerlRegEx;
-
-interface
-
-
-
-uses SysUtils;
-
-type
- TPerlRegExOptions = set of (
- preCaseLess, // /i -> Case insensitive
- preMultiLine, // /m -> ^ and $ also match before/after a newline, not just at the beginning and the end of the string
- preSingleLine, // /s -> Dot matches any character, including \n (newline). Otherwise, it matches anything except \n
- preExtended, // /x -> Allow regex to contain extra whitespace, newlines and Perl-style comments, all of which will be filtered out
- preAnchored, // /A -> Successful match can only occur at the start of the subject or right after the previous match
- preDollarEndOnly, // /E
- preExtra, // /X
- preUnGreedy, // Repeat operators (+, *, ?) are not greedy by default
- // (i.e. they try to match the minimum number of characters instead of the maximum)
- preUTF8 // UTF8
- );
-type
- TPerlRegExState = set of (
- preNotBOL, // Not Beginning Of Line: ^ does not match at the start of Subject
- preNotEOL, // Not End Of Line: $ does not match at the end of Subject
- preNotEmpty // Empty matches not allowed
- );
-
-const
- // Maximum number of subexpressions (backreferences)
- // Subexpressions are created by placing round brackets in the regex, and are referenced by \1, \2, ...
- // In Perl, they are available as $1, $2, ... after the regex matched; with TPerlRegEx, use the Subexpressions property
- // You can also insert \1, \2, ... in the Replacement string; \0 is the complete matched expression
- MAX_SUBEXPRESSIONS = 99;
-
-
-type
- TPerlRegEx = class
- private // *** Property storage, getters and setters
- FCompiled, FStudied: Boolean;
- FOptions: TPerlRegExOptions;
- FState: TPerlRegExState;
- FRegEx, FSubject: string;
- FStart, FStop: Integer;
- function GetMatchedExpression: string;
- function GetMatchedExpressionLength: Integer;
- function GetMatchedExpressionOffset: Integer;
- procedure SetOptions(Value: TPerlRegExOptions);
- procedure SetRegEx(const Value: string);
- function GetSubExpressionCount: Integer;
- function GetSubExpressions(Index: Integer): string;
- function GetSubExpressionLengths(Index: Integer): Integer;
- function GetSubExpressionOffsets(Index: Integer): Integer;
- procedure SetSubject(const Value: string);
- procedure SetStart(const Value: Integer);
- procedure SetStop(const Value: Integer);
- function GetFoundMatch: Boolean;
- private // *** Variables used by pcrelib.dll
- Offsets: array[0..(MAX_SUBEXPRESSIONS+1)*3] of Integer;
- OffsetCount: Integer;
- pcreOptions: Integer;
- pattern, hints, chartable: Pointer;
- FSubjectPChar: PChar;
- protected
- procedure CleanUp;
- // Dispose off whatever we created, so we can start over. Called automatically when needed, so it is not made public
- public
- constructor Create;
- // Come to life
- destructor Destroy; override;
- // Clean up after ourselves
- class function EscapeRegExChars(const S: string): string;
- // Escapes regex characters in S so that the regex engine can be used to match S as plain text
- procedure Compile;
- // Compile the regex. Called automatically by Match
- procedure Study;
- // Study the regex. Studying takes time, but will make the execution of the regex a lot faster.
- // Call study if you will be using the same regex many times
- function Match: Boolean;
- // Attempt to match the regex
- function MatchAgain: Boolean;
- // Attempt to match the regex to the remainder of the string after the previous match
- // To avoid problems (when using ^ in the regex), call MatchAgain only after a succesful Match()
- function NamedSubExpression(const SEName: string): Integer;
- // Returns the index of the named group SEName
- function Config (What: integer): Integer;
- //This function makes it possible for a client program
- // to find out which optional features are available in the
- //version of the PCRE library it is using.
- property Compiled: Boolean read FCompiled;
- // True if the RegEx has already been compiled.
- property FoundMatch: Boolean read GetFoundMatch;
- // Returns True when MatchedExpression* and SubExpression* indicate a match
- property Studied: Boolean read FStudied;
- // True if the RegEx has already been studied
- property MatchedExpression: string read GetMatchedExpression;
- // The matched string
- property MatchedExpressionLength: Integer read GetMatchedExpressionLength;
- // Length of the matched string
- property MatchedExpressionOffset: Integer read GetMatchedExpressionOffset;
- // Character offset in the Subject string at which the matched substring starts
- property Start: Integer read FStart write SetStart;
- // Starting position in Subject from which MatchAgain begins
- property Stop: Integer read FStop write SetStop;
- // Last character in Subject that Match and MatchAgain search through
- property State: TPerlRegExState read FState write FState;
- // State of Subject
- property SubExpressionCount: Integer read GetSubExpressionCount;
- // Number of matched subexpressions
- property SubExpressions[Index: Integer]: string read GetSubExpressions;
- // Matched subexpressions after a regex has been matched
- property SubExpressionLengths[Index: Integer]: Integer read GetSubExpressionLengths;
- // Lengths of the subexpressions
- property SubExpressionOffsets[Index: Integer]: Integer read GetSubExpressionOffsets;
- // Character offsets in the Subject string of the subexpressions
- property Subject: string read FSubject write SetSubject;
- // The string on which Match() will try to match RegEx
- published
- property Options: TPerlRegExOptions read FOptions write SetOptions;
- // Options
- property RegEx: string read FRegEx write SetRegEx;
- // The regular expression to be matched
- end;
-
-
-
-implementation
-
- { ********* pcrelib.dll imports ********* }
-
-const
- PCRE_CASELESS = $00000001;
- PCRE_MULTILINE = $00000002;
- PCRE_SINGLELINE = $00000004;
- PCRE_EXTENDED = $00000008;
- PCRE_ANCHORED = $00000010;
- PCRE_DOLLAR_ENDONLY = $00000020;
- PCRE_EXTRA = $00000040;
- PCRE_NOTBOL = $00000080;
- PCRE_NOTEOL = $00000100;
- PCRE_UNGREEDY = $00000200;
- PCRE_NOTEMPTY = $00000400;
- PCRE_UTF8 = $00000800;
- PCRE_NO_AUTO_CAPTURE = $00001000;
- PCRE_NO_UTF8_CHECK = $00002000;
- PCRE_AUTO_CALLOUT = $00004000;
- PCRE_PARTIAL = $00008000;
- PCRE_DFA_SHORTEST = $00010000;
- PCRE_DFA_RESTART = $00020000;
- PCRE_FIRSTLINE = $00040000;
- PCRE_DUPNAMES = $00080000;
- PCRE_NEWLINE_CR = $00100000;
- PCRE_NEWLINE_LF = $00200000;
- PCRE_NEWLINE_CRLF = $00300000;
- PCRE_NEWLINE_ANY = $00400000;
-
- // Exec error codes
- PCRE_ERROR_NOMATCH = -1;
- PCRE_ERROR_NULL = -2;
- PCRE_ERROR_BADOPTION = -3;
- PCRE_ERROR_BADMAGIC = -4;
- PCRE_ERROR_UNKNOWN_OPCODE = -5;
- PCRE_ERROR_UNKNOWN_NODE = -5; // For backward compatibility
- PCRE_ERROR_NOMEMORY = -6;
- PCRE_ERROR_NOSUBSTRING = -7;
- PCRE_ERROR_MATCHLIMIT = -8;
- PCRE_ERROR_CALLOUT = -9; // Never used by PCRE itself
- PCRE_ERROR_BADUTF8 =-10;
- PCRE_ERROR_BADUTF8_OFFSET =-11;
- PCRE_ERROR_PARTIAL =-12;
- PCRE_ERROR_BADPARTIAL =-13;
- PCRE_ERROR_INTERNAL =-14;
- PCRE_ERROR_BADCOUNT =-15;
- PCRE_ERROR_DFA_UITEM =-16;
- PCRE_ERROR_DFA_UCOND =-17;
- PCRE_ERROR_DFA_UMLIMIT =-18;
- PCRE_ERROR_DFA_WSSIZE =-19;
- PCRE_ERROR_DFA_RECURSE =-20;
- PCRE_ERROR_RECURSIONLIMIT =-21;
- PCRE_ERROR_NULLWSLIMIT =-22;
- PCRE_ERROR_BADNEWLINE =-23;
-
-(* Request types for pcre_config(). Do not re-arrange, in order to remain compatible. *)
-
- PCRE_CONFIG_UTF8 = 0;
- PCRE_CONFIG_NEWLINE = 1;
- PCRE_CONFIG_LINK_SIZE = 2;
- PCRE_CONFIG_POSIX_MALLOC_THRESHOLD = 3;
- PCRE_CONFIG_MATCH_LIMIT = 4;
- PCRE_CONFIG_STACKRECURSE = 5;
- PCRE_CONFIG_UNICODE_PROPERTIES = 6;
- PCRE_CONFIG_MATCH_LIMIT_RECURSION = 7;
-type
- PPChar = ^PChar;
- PInt = ^Integer;
-
-
-
-// Functions we import from the PCRE library DLL
-// Leading underscores gratuitously added by Borland C++Builder 6.0
-function pcre_maketables: PAnsiChar; cdecl; external 'pcre3.dll';
-function pcre_compile(const pattern: PChar; options: Integer; errorptr: PPChar; erroroffset: PInt;
- const tables: PChar): Pointer; cdecl; external 'pcre3.dll';
-function pcre_exec(const pattern: Pointer; const hints: Pointer; const subject: PChar; length, startoffset: Integer;
- options: Integer; offsets: PInt; offsetcount: Integer): Integer; cdecl; external 'pcre3.dll';
-function pcre_get_stringnumber(const pattern: Pointer; const Name: PChar): Integer; cdecl; external 'pcre3.dll';
-function pcre_study(const pattern: Pointer; options: Integer; errorptr: PPChar): Pointer; cdecl; external 'pcre3.dll';
-function pcre_fullinfo(const pattern: Pointer; const hints: Pointer; what: Integer; where: Pointer): Integer; cdecl; external 'pcre3.dll';
-function pcre_version: pchar; cdecl; external 'pcre3.dll';
-function pcre_config (what:integer; where:pointer):integer; cdecl; external 'pcre3.dll';
-//procedure pcre_free(ptr: Pointer); cdecl; external 'pcre3.dll';
-
-
-
-
- { ********* TPerlRegEx component ********* }
-
-procedure TPerlRegEx.CleanUp;
-begin
- FCompiled := False; FStudied := False;
- pattern := nil; hints := nil;
- OffsetCount := 0;
-end;
-
-procedure TPerlRegEx.Compile;
-var
- Error: PChar;
- ErrorOffset: Integer;
-begin
- if FRegEx = '' then raise Exception.Create('TPerlRegEx.Compile() - Please specify a regular expression in RegEx first');
- CleanUp;
- Pattern := pcre_compile(PChar(FRegEx), pcreOptions, @Error, @ErrorOffset, chartable);
- if Pattern = nil then
- raise Exception.Create(Format('TPerlRegEx.Compile() - Error in regex at offset %d: %s', [ErrorOffset, AnsiString(Error)]));
- FCompiled := True
-end;
-
-
-constructor TPerlRegEx.Create;
-begin
- inherited Create;
- FState := [preNotEmpty];
- chartable := pcre_maketables;
-end;
-
-destructor TPerlRegEx.Destroy;
-begin
- CleanUp;
- inherited Destroy;
-end;
-
-class function TPerlRegEx.EscapeRegExChars(const S: string): string;
-var
- I: Integer;
-begin
- Result := S;
- I := Length(Result);
- while I > 0 do begin
- if Result[I] in ['.', '[', ']', '(', ')', '?', '*', '+', '{', '}', '^', '$', '|', '\'] then
- Insert('\', Result, I)
- else if Result[I] = #0 then begin
- Result[I] := '0';
- Insert('\', Result, I);
- end;
- Dec(I);
- end;
-end;
-
-function TPerlRegEx.GetFoundMatch: Boolean;
-begin
- Result := OffsetCount > 0;
-end;
-
-function TPerlRegEx.GetMatchedExpression: string;
-begin
- Assert(FoundMatch, 'REQUIRE: There must be a successful match first');
- Result := GetSubExpressions(0);
-end;
-
-function TPerlRegEx.GetMatchedExpressionLength: Integer;
-begin
- Assert(FoundMatch, 'REQUIRE: There must be a successful match first');
- Result := GetSubExpressionLengths(0)
-end;
-
-function TPerlRegEx.GetMatchedExpressionOffset: Integer;
-begin
- Assert(FoundMatch, 'REQUIRE: There must be a successful match first');
- Result := GetSubExpressionOffsets(0)
-end;
-
-function TPerlRegEx.GetSubExpressionCount: Integer;
-begin
- Assert(FoundMatch, 'REQUIRE: There must be a successful match first');
- Result := OffsetCount-1
-end;
-
-function TPerlRegEx.GetSubExpressionLengths(Index: Integer): Integer;
-begin
- Assert(FoundMatch, 'REQUIRE: There must be a successful match first');
- Assert((Index >= 0) and (Index <= SubExpressionCount), 'REQUIRE: Index <= SubExpressionCount');
- Result := Offsets[Index*2+1]-Offsets[Index*2]
-end;
-
-function TPerlRegEx.GetSubExpressionOffsets(Index: Integer): Integer;
-begin
- Assert(FoundMatch, 'REQUIRE: There must be a successful match first');
- Assert((Index >= 0) and (Index <= SubExpressionCount), 'REQUIRE: Index <= SubExpressionCount');
- Result := Offsets[Index*2]
-end;
-
-function TPerlRegEx.GetSubExpressions(Index: Integer): string;
-begin
- Assert(FoundMatch, 'REQUIRE: There must be a successful match first');
- if Index > SubExpressionCount then Result := ''
- else Result := Copy(FSubject, Offsets[Index*2], Offsets[Index*2+1]-Offsets[Index*2]);
-end;
-
-function TPerlRegEx.Match: Boolean;
-var
- I, Opts: Integer;
-begin
- if not Compiled then Compile;
- if preNotBOL in State then Opts := PCRE_NOTBOL else Opts := 0;
- if preNotEOL in State then Opts := Opts or PCRE_NOTEOL;
- if preNotEmpty in State then Opts := Opts or PCRE_NOTEMPTY;
- if FStart > FStop then OffsetCount := -1
- else OffsetCount := pcre_exec(Pattern, Hints, FSubjectPChar, FStop, 0, Opts, @Offsets[0], High(Offsets));
- Result := OffsetCount > 0;
- // Convert offsets into string indices
- if Result then begin
- for I := 0 to OffsetCount*2-1 do
- Inc(Offsets[I]);
- FStart := Offsets[1];
- if Offsets[0] = Offsets[1] then Inc(FStart); // Make sure we don't get stuck at the same position
- end;
-end;
-
-function TPerlRegEx.MatchAgain: Boolean;
-var
- I, Opts: Integer;
-begin
- if not Compiled then Compile;
- if preNotBOL in State then Opts := PCRE_NOTBOL else Opts := 0;
- if preNotEOL in State then Opts := Opts or PCRE_NOTEOL;
- if preNotEmpty in State then Opts := Opts or PCRE_NOTEMPTY;
- if FStart > FStop then OffsetCount := -1
- else OffsetCount := pcre_exec(Pattern, Hints, FSubjectPChar, FStop, FStart-1, Opts, @Offsets[0], High(Offsets));
- Result := OffsetCount > 0;
- // Convert offsets into string indices
- if Result then begin
- for I := 0 to OffsetCount*2-1 do
- Inc(Offsets[I]);
- FStart := Offsets[1];
- if Offsets[0] = Offsets[1] then Inc(FStart); // Make sure we don't get stuck at the same position
- end;
-end;
-
-function TPerlRegEx.NamedSubExpression(const SEName: string): Integer;
-begin
- Result := pcre_get_stringnumber(Pattern, PChar(SEName));
-end;
-
-function TPerlRegEx.Config(What: integer):integer;
-begin
- result:=-1;
- pcre_config(what, @result);
-end;
-
-
-procedure TPerlRegEx.SetOptions(Value: TPerlRegExOptions);
-begin
- if (FOptions <> Value) then begin
- FOptions := Value;
- pcreOptions := 0;
- if (preCaseLess in Value) then pcreOptions := pcreOptions or PCRE_CASELESS;
- if (preMultiLine in Value) then pcreOptions := pcreOptions or PCRE_MULTILINE;
- if (preSingleLine in Value) then pcreOptions := pcreOptions or PCRE_SINGLELINE;
- if (preExtended in Value) then pcreOptions := pcreOptions or PCRE_EXTENDED;
- if (preAnchored in Value) then pcreOptions := pcreOptions or PCRE_ANCHORED;
- if (preDollarEndOnly in Value) then pcreOptions := pcreOptions or PCRE_DOLLAR_ENDONLY;
- if (preExtra in Value) then pcreOptions := pcreOptions or PCRE_EXTRA;
- if (preUnGreedy in Value) then pcreOptions := pcreOptions or PCRE_UNGREEDY;
- if (preUTF8 in Value) then pcreOptions := pcreOptions or PCRE_UTF8;
- CleanUp
- end
-end;
-
-procedure TPerlRegEx.SetRegEx(const Value: string);
-begin
- if FRegEx <> Value then begin
- FRegEx := Value;
- CleanUp
- end
-end;
-
-procedure TPerlRegEx.SetStart(const Value: Integer);
-begin
- if Value < 1 then FStart := 1
- else FStart := Value;
- // If FStart > Length(Subject), MatchAgain() will simply return False
-end;
-
-procedure TPerlRegEx.SetStop(const Value: Integer);
-begin
- if Value > Length(Subject) then FStop := Length(Subject)
- else FStop := Value;
-end;
-
-procedure TPerlRegEx.SetSubject(const Value: string);
-begin
- FSubject := Value;
- FSubjectPChar := PChar(Value);
- FStart := 1;
- FStop := Length(Subject);
- OffsetCount := 0;
-end;
-
-
-
-procedure TPerlRegEx.Study;
-var
- Error: PChar;
-begin
- if not FCompiled then Compile;
- Hints := pcre_study(Pattern, 0, @Error);
- if Error <> nil then raise Exception.Create('TPerlRegEx.Study() - Error studying the regex: ' + AnsiString(Error));
- FStudied := True
-end;
-
-
-end.
\ No newline at end of file |