1 //------------------------------------------------------------------------------
2 // <copyright file="RegexReplacement.cs" company="Microsoft">
3 //     Copyright (c) Microsoft Corporation.  All rights reserved.
4 // </copyright>
5 //------------------------------------------------------------------------------
6 
7 // The RegexReplacement class represents a substitution string for
8 // use when using regexs to search/replace, etc. It's logically
9 // a sequence intermixed (1) constant strings and (2) group numbers.
10 
11 namespace System.Text.RegularExpressions {
12 
13     using System.Collections;
14     using System.Collections.Generic;
15 
16     internal sealed class RegexReplacement {
17         /*
18          * Since RegexReplacement shares the same parser as Regex,
19          * the constructor takes a RegexNode which is a concatenation
20          * of constant strings and backreferences.
21          */
22 #if SILVERLIGHT
RegexReplacement(String rep, RegexNode concat, Dictionary<Int32, Int32> _caps)23         internal RegexReplacement(String rep, RegexNode concat, Dictionary<Int32, Int32> _caps) {
24 #else
25         internal RegexReplacement(String rep, RegexNode concat, Hashtable _caps) {
26 #endif
27             StringBuilder sb;
28             List<String> strings;
29             List<Int32> rules;
30             int slot;
31 
32             _rep = rep;
33 
34             if (concat.Type() != RegexNode.Concatenate)
35                 throw new ArgumentException(SR.GetString(SR.ReplacementError));
36 
37             sb = new StringBuilder();
38             strings = new List<String>();
39             rules = new List<Int32>();
40 
41             for (int i = 0; i < concat.ChildCount(); i++) {
42                 RegexNode child = concat.Child(i);
43 
44                 switch (child.Type()) {
45                     case RegexNode.Multi:
46                         sb.Append(child._str);
47                         break;
48                     case RegexNode.One:
49                         sb.Append(child._ch);
50                         break;
51                     case RegexNode.Ref:
52                         if (sb.Length > 0) {
53                             rules.Add(strings.Count);
54                             strings.Add(sb.ToString());
55                             sb.Length = 0;
56                         }
57                         slot = child._m;
58 
59                         if (_caps != null && slot >= 0)
60                             slot = (int)_caps[slot];
61 
62                         rules.Add(-Specials - 1 - slot);
63                         break;
64                     default:
65                         throw new ArgumentException(SR.GetString(SR.ReplacementError));
66                 }
67             }
68 
69             if (sb.Length > 0) {
70                 rules.Add(strings.Count);
71                 strings.Add(sb.ToString());
72             }
73 
74             _strings = strings;
75             _rules = rules;
76         }
77 
78         internal String _rep;
79         internal List<String>  _strings;          // table of string constants
80         internal List<Int32>  _rules;            // negative -> group #, positive -> string #
81 
82         // constants for special insertion patterns
83 
84         internal const int Specials       = 4;
85         internal const int LeftPortion    = -1;
86         internal const int RightPortion   = -2;
87         internal const int LastGroup      = -3;
88         internal const int WholeString    = -4;
89 
90         /*
91          * Given a Match, emits into the StringBuilder the evaluated
92          * substitution pattern.
93          */
94         private void ReplacementImpl(StringBuilder sb, Match match) {
95             for (int i = 0; i < _rules.Count; i++) {
96                 int r = _rules[i];
97                 if (r >= 0)   // string lookup
98                     sb.Append(_strings[r]);
99                 else if (r < -Specials) // group lookup
100                     sb.Append(match.GroupToStringImpl(-Specials - 1 - r));
101                 else {
102                     switch (-Specials - 1 - r) { // special insertion patterns
103                         case LeftPortion:
104                             sb.Append(match.GetLeftSubstring());
105                             break;
106                         case RightPortion:
107                             sb.Append(match.GetRightSubstring());
108                             break;
109                         case LastGroup:
110                             sb.Append(match.LastGroupToStringImpl());
111                             break;
112                         case WholeString:
113                             sb.Append(match.GetOriginalString());
114                             break;
115                     }
116                 }
117             }
118         }
119 
120         /*
121          * Given a Match, emits into the List<String> the evaluated
122          * Right-to-Left substitution pattern.
123          */
124         private void ReplacementImplRTL(List<String> al, Match match) {
125             for (int i = _rules.Count - 1; i >= 0; i--) {
126                 int r = _rules[i];
127                 if (r >= 0)  // string lookup
128                     al.Add(_strings[r]);
129                 else if (r < -Specials) // group lookup
130                     al.Add(match.GroupToStringImpl(-Specials - 1 - r));
131                 else {
132                     switch (-Specials - 1 - r) { // special insertion patterns
133                         case LeftPortion:
134                             al.Add(match.GetLeftSubstring());
135                             break;
136                         case RightPortion:
137                             al.Add(match.GetRightSubstring());
138                             break;
139                         case LastGroup:
140                             al.Add(match.LastGroupToStringImpl());
141                             break;
142                         case WholeString:
143                             al.Add(match.GetOriginalString());
144                             break;
145                     }
146                 }
147             }
148         }
149 
150         /*
151          * The original pattern string
152          */
153         internal String Pattern {
154             get {
155                 return _rep;
156             }
157         }
158 
159         /*
160          * Returns the replacement result for a single match
161          */
162         internal String Replacement(Match match) {
163             StringBuilder sb = new StringBuilder();
164 
165             ReplacementImpl(sb, match);
166 
167             return sb.ToString();
168         }
169 
170         /*
171          * Three very similar algorithms appear below: replace (pattern),
172          * replace (evaluator), and split.
173          */
174 
175 
176         /*
177          * Replaces all ocurrances of the regex in the string with the
178          * replacement pattern.
179          *
180          * Note that the special case of no matches is handled on its own:
181          * with no matches, the input string is returned unchanged.
182          * The right-to-left case is split out because StringBuilder
183          * doesn't handle right-to-left string building directly very well.
184          */
185         internal String Replace(Regex regex, String input, int count, int startat) {
186             Match match;
187 
188             if (count < -1)
189                 throw new ArgumentOutOfRangeException("count", SR.GetString(SR.CountTooSmall));
190             if (startat < 0 || startat > input.Length)
191                 throw new ArgumentOutOfRangeException("startat", SR.GetString(SR.BeginIndexNotNegative));
192 
193             if (count == 0)
194                 return input;
195 
196             match = regex.Match(input, startat);
197             if (!match.Success) {
198                 return input;
199             }
200             else {
201                 StringBuilder sb;
202 
203                 if (!regex.RightToLeft) {
204                     sb = new StringBuilder();
205                     int prevat = 0;
206 
207                     do {
208                         if (match.Index != prevat)
209                             sb.Append(input, prevat, match.Index - prevat);
210 
211                         prevat = match.Index + match.Length;
212                         ReplacementImpl(sb, match);
213                         if (--count == 0)
214                             break;
215 
216                         match = match.NextMatch();
217                     } while (match.Success);
218 
219                     if (prevat < input.Length)
220                         sb.Append(input, prevat, input.Length - prevat);
221                 }
222                 else {
223                     List<String> al = new List<String>();
224                     int prevat = input.Length;
225 
226                     do {
227                         if (match.Index + match.Length != prevat)
228                             al.Add(input.Substring(match.Index + match.Length, prevat - match.Index - match.Length));
229 
230                         prevat = match.Index;
231                         ReplacementImplRTL(al, match);
232                         if (--count == 0)
233                             break;
234 
235                         match = match.NextMatch();
236                     } while (match.Success);
237 
238                     sb = new StringBuilder();
239 
240                     if (prevat > 0)
241                         sb.Append(input, 0, prevat);
242 
243                     for (int i = al.Count - 1; i >= 0; i--) {
244                         sb.Append(al[i]);
245                     }
246                 }
247 
248                 return sb.ToString();
249             }
250         }
251 
252         /*
253          * Replaces all ocurrances of the regex in the string with the
254          * replacement evaluator.
255          *
256          * Note that the special case of no matches is handled on its own:
257          * with no matches, the input string is returned unchanged.
258          * The right-to-left case is split out because StringBuilder
259          * doesn't handle right-to-left string building directly very well.
260          */
261         internal static String Replace(MatchEvaluator evaluator, Regex regex,
262                                        String input, int count, int startat) {
263             Match match;
264 
265             if (evaluator == null)
266                 throw new ArgumentNullException("evaluator");
267             if (count < -1)
268                 throw new ArgumentOutOfRangeException("count", SR.GetString(SR.CountTooSmall));
269             if (startat < 0 || startat > input.Length)
270                 throw new ArgumentOutOfRangeException("startat", SR.GetString(SR.BeginIndexNotNegative));
271 
272             if (count == 0)
273                 return input;
274 
275             match = regex.Match(input, startat);
276 
277             if (!match.Success) {
278                 return input;
279             }
280             else {
281                 StringBuilder sb;
282 
283                 if (!regex.RightToLeft) {
284                     sb = new StringBuilder();
285                     int prevat = 0;
286 
287                     do {
288                         if (match.Index != prevat)
289                             sb.Append(input, prevat, match.Index - prevat);
290 
291                         prevat = match.Index + match.Length;
292 
293                         sb.Append(evaluator(match));
294 
295                         if (--count == 0)
296                             break;
297 
298                         match = match.NextMatch();
299                     } while (match.Success);
300 
301                     if (prevat < input.Length)
302                         sb.Append(input, prevat, input.Length - prevat);
303                 }
304                 else {
305                     List<String> al = new List<String>();
306                     int prevat = input.Length;
307 
308                     do {
309                         if (match.Index + match.Length != prevat)
310                             al.Add(input.Substring(match.Index + match.Length, prevat - match.Index - match.Length));
311 
312                         prevat = match.Index;
313 
314                         al.Add(evaluator(match));
315 
316                         if (--count == 0)
317                             break;
318 
319                         match = match.NextMatch();
320                     } while (match.Success);
321 
322                     sb = new StringBuilder();
323 
324                     if (prevat > 0)
325                         sb.Append(input, 0, prevat);
326 
327                     for (int i = al.Count - 1; i >= 0; i--) {
328                         sb.Append(al[i]);
329                     }
330                 }
331 
332                 return sb.ToString();
333             }
334         }
335 
336         /*
337          * Does a split. In the right-to-left case we reorder the
338          * array to be forwards.
339          */
340         internal static String[] Split(Regex regex, String input, int count, int startat) {
341             Match match;
342             String[] result;
343 
344             if (count < 0)
345                 throw new ArgumentOutOfRangeException("count", SR.GetString(SR.CountTooSmall));
346 
347             if (startat < 0 || startat > input.Length)
348                 throw new ArgumentOutOfRangeException("startat", SR.GetString(SR.BeginIndexNotNegative));
349 
350             if (count == 1) {
351                 result = new String[1];
352                 result[0] = input;
353                 return result;
354             }
355 
356             count -= 1;
357 
358             match = regex.Match(input, startat);
359 
360             if (!match.Success) {
361                 result = new String[1];
362                 result[0] = input;
363                 return result;
364             }
365             else {
366                 List<String> al = new List<String>();
367 
368                 if (!regex.RightToLeft) {
369                     int prevat = 0;
370 
371                     for (;;) {
372                         al.Add(input.Substring(prevat, match.Index - prevat));
373 
374                         prevat = match.Index + match.Length;
375 
376                         // add all matched capture groups to the list.
377                         for (int i=1; i<match.Groups.Count; i++) {
378                             if (match.IsMatched(i))
379                                 al.Add(match.Groups[i].ToString());
380                         }
381 
382                         if (--count == 0)
383                             break;
384 
385                         match = match.NextMatch();
386 
387                         if (!match.Success)
388                             break;
389                     }
390 
391                     al.Add(input.Substring(prevat, input.Length - prevat));
392                 }
393                 else {
394                     int prevat = input.Length;
395 
396                     for (;;) {
397                         al.Add(input.Substring(match.Index + match.Length, prevat - match.Index - match.Length));
398 
399                         prevat = match.Index;
400 
401                         // add all matched capture groups to the list.
402                         for (int i=1; i<match.Groups.Count; i++) {
403                             if (match.IsMatched(i))
404                                 al.Add(match.Groups[i].ToString());
405                         }
406 
407                         if (--count == 0)
408                             break;
409 
410                         match = match.NextMatch();
411 
412                         if (!match.Success)
413                             break;
414                     }
415 
416                     al.Add(input.Substring(0, prevat));
417 
418                     al.Reverse(0, al.Count);
419                 }
420 
421                 return al.ToArray();
422             }
423         }
424     }
425 
426 }
427