plugins/HistoryPlusPlus/hpp_strparser.pas


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176

(*
    History++ plugin for Miranda IM: the free IM client for Microsoft* Windows*

    Copyright (C) 2006-2009 theMIROn, 2003-2006 Art Fedorov.
    History+ parts (C) 2001 Christian Kastner

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*)

{-----------------------------------------------------------------------------
 hpp_strparser.pas (historypp project)

 Version:   1.5
 Created:   18.04.2006
 Author:    Oxygen

 [ Description ]

 This unit provides AnsiString parsing routines. Mainly it was added to
 parse tokens from the AnsiString. See TokenizeString for description.

 [ Modifications ]
 none

 [ Known Issues ]
 none

 Contributors: Art Fedorov
-----------------------------------------------------------------------------}

unit hpp_strparser;

interface

uses
  hpp_global;

procedure TokenizeString(const Template: String; var Tokens: TWideStrArray; var SpecialTokens: TIntArray);

implementation

{
  This procedure splits AnsiString into array.

  The split is based on three token types:
  1) general text
  2) chars prefixed with '\', like '\n'
  3) AnsiString surrounded by %'s, like '%mymom%'

  You input the AnsiString in Template variable and it outputs
  * Tokens: array of all tokens
  * SpecialTokens: array of token indexes from the Tokens array,
    where indexes are of 2nd and 3rd type tokens

  You can get the orginial template AnsiString if you combine all strings
  from tokens array. It means that Template = Tokens[0]+Tokens[1]+...+Tokens[n]

  The idea is that after recieving special tokens array, you can scan through
  them and change all the special tokens you want in the tokens array and then
  combine tokens array to get template with the needed tokens substituted

  *** Examples (special tokens in double quotes here):
     'My %mom% is good\not bad' -> 'My '+"%mom%"+' is good'+"\n"+'ot bad'
     '%My mom% is good' -> "%My mom%"+' is good'
  *** Placing \'s inside %'s would give you type 2 token, not type 3:
     '%My \mom% is good' -> '%My '+"\m"+'om% is good'
  *** \'s and %'s at the end of the line don't get counted:
     'My mom\' -> 'My mom\'
     'My mom%' -> 'My mom%'
     'My mom is %good' -> 'My mom is %good'
  *** But
     'My mom is %good%' -> 'My mom is '+"%good%"
  *** Double %'s is also counted as token:
     'My %% mom' -> 'My '+"%%"+' mom'

  So, feeding it 'My %mom% is good\nNot bad' would output:
  Tokens =>
    [0] -> 'My '
    [1] -> '%mom%'
    [2] -> ' is good'
    [3] -> '\n'
    [4] -> 'Not bad'
  SpecialTokens =>
    [0] -> 1
    [1] -> 3
}
procedure TokenizeString(const Template: String; var Tokens: TWideStrArray; var SpecialTokens: TIntArray);
var
  i,len: Integer;
  token_s: Integer;
  in_token: Boolean;

  procedure PushToken(StartIdx,EndIdx: Integer; Special: Boolean = False);
  begin
    if EndIdx < StartIdx then
      exit;
    if not Special then
    begin // if not special, try to append current token to previous
      if Length(Tokens) > 0 then
      begin
        if not ((Length(SpecialTokens) > 0) and
        (SpecialTokens[High(SpecialTokens)] = High(Tokens))) then  // previous was not special
        begin
          Tokens[High(Tokens)] := Tokens[High(Tokens)] + Copy(Template,StartIdx,EndIdx-StartIdx+1);
          exit;
        end;
      end;
    end;
    SetLength(Tokens,Length(Tokens)+1);
    Tokens[High(Tokens)] := Copy(Template,StartIdx,EndIdx-StartIdx+1);
    if Special then
    begin
      SetLength(SpecialTokens,Length(SpecialTokens)+1);
      SpecialTokens[High(SpecialTokens)] := High(Tokens);
    end;
  end;

begin
  len := Length(Template);
  SetLength(Tokens,0);
  SetLength(SpecialTokens,0);

  token_s := 1;
  in_token := False;
  i := 1;
  while i <= len do
  begin
    if (Template[i]='\') or (Template[i]='%') then
    begin
      if Template[i] = '\' then
      begin
        if i = len then
          break;
        PushToken(token_s,i-1);
        token_s := i;
        PushToken(token_s,token_s+1,True);
        token_s := i+2;
        i := token_s;
        in_token := False;
        continue;
      end
      else
      begin
        if in_token then
        begin
          PushToken(token_s,i,True);
          token_s := i + 1;
          in_token := False;
        end
        else
        begin
          PushToken(token_s,i-1);
          token_s := i;
          in_token := True;
        end;
      end;
    end;
    Inc(i);
  end;

  PushToken(token_s,len);
end;

end.