1 //------------------------------------------------------------------------------ 2 // <copyright file="RegexReplacement.cs" company="Microsoft"> 3 // Copyright (c) Microsoft Corporation. All rights reserved. 4 // </copyright> 5 //------------------------------------------------------------------------------ 6 7 // The RegexReplacement class represents a substitution string for 8 // use when using regexs to search/replace, etc. It's logically 9 // a sequence intermixed (1) constant strings and (2) group numbers. 10 11 namespace System.Text.RegularExpressions { 12 13 using System.Collections; 14 using System.Collections.Generic; 15 16 internal sealed class RegexReplacement { 17 /* 18 * Since RegexReplacement shares the same parser as Regex, 19 * the constructor takes a RegexNode which is a concatenation 20 * of constant strings and backreferences. 21 */ 22 #if SILVERLIGHT RegexReplacement(String rep, RegexNode concat, Dictionary<Int32, Int32> _caps)23 internal RegexReplacement(String rep, RegexNode concat, Dictionary<Int32, Int32> _caps) { 24 #else 25 internal RegexReplacement(String rep, RegexNode concat, Hashtable _caps) { 26 #endif 27 StringBuilder sb; 28 List<String> strings; 29 List<Int32> rules; 30 int slot; 31 32 _rep = rep; 33 34 if (concat.Type() != RegexNode.Concatenate) 35 throw new ArgumentException(SR.GetString(SR.ReplacementError)); 36 37 sb = new StringBuilder(); 38 strings = new List<String>(); 39 rules = new List<Int32>(); 40 41 for (int i = 0; i < concat.ChildCount(); i++) { 42 RegexNode child = concat.Child(i); 43 44 switch (child.Type()) { 45 case RegexNode.Multi: 46 sb.Append(child._str); 47 break; 48 case RegexNode.One: 49 sb.Append(child._ch); 50 break; 51 case RegexNode.Ref: 52 if (sb.Length > 0) { 53 rules.Add(strings.Count); 54 strings.Add(sb.ToString()); 55 sb.Length = 0; 56 } 57 slot = child._m; 58 59 if (_caps != null && slot >= 0) 60 slot = (int)_caps[slot]; 61 62 rules.Add(-Specials - 1 - slot); 63 break; 64 default: 65 throw new ArgumentException(SR.GetString(SR.ReplacementError)); 66 } 67 } 68 69 if (sb.Length > 0) { 70 rules.Add(strings.Count); 71 strings.Add(sb.ToString()); 72 } 73 74 _strings = strings; 75 _rules = rules; 76 } 77 78 internal String _rep; 79 internal List<String> _strings; // table of string constants 80 internal List<Int32> _rules; // negative -> group #, positive -> string # 81 82 // constants for special insertion patterns 83 84 internal const int Specials = 4; 85 internal const int LeftPortion = -1; 86 internal const int RightPortion = -2; 87 internal const int LastGroup = -3; 88 internal const int WholeString = -4; 89 90 /* 91 * Given a Match, emits into the StringBuilder the evaluated 92 * substitution pattern. 93 */ 94 private void ReplacementImpl(StringBuilder sb, Match match) { 95 for (int i = 0; i < _rules.Count; i++) { 96 int r = _rules[i]; 97 if (r >= 0) // string lookup 98 sb.Append(_strings[r]); 99 else if (r < -Specials) // group lookup 100 sb.Append(match.GroupToStringImpl(-Specials - 1 - r)); 101 else { 102 switch (-Specials - 1 - r) { // special insertion patterns 103 case LeftPortion: 104 sb.Append(match.GetLeftSubstring()); 105 break; 106 case RightPortion: 107 sb.Append(match.GetRightSubstring()); 108 break; 109 case LastGroup: 110 sb.Append(match.LastGroupToStringImpl()); 111 break; 112 case WholeString: 113 sb.Append(match.GetOriginalString()); 114 break; 115 } 116 } 117 } 118 } 119 120 /* 121 * Given a Match, emits into the List<String> the evaluated 122 * Right-to-Left substitution pattern. 123 */ 124 private void ReplacementImplRTL(List<String> al, Match match) { 125 for (int i = _rules.Count - 1; i >= 0; i--) { 126 int r = _rules[i]; 127 if (r >= 0) // string lookup 128 al.Add(_strings[r]); 129 else if (r < -Specials) // group lookup 130 al.Add(match.GroupToStringImpl(-Specials - 1 - r)); 131 else { 132 switch (-Specials - 1 - r) { // special insertion patterns 133 case LeftPortion: 134 al.Add(match.GetLeftSubstring()); 135 break; 136 case RightPortion: 137 al.Add(match.GetRightSubstring()); 138 break; 139 case LastGroup: 140 al.Add(match.LastGroupToStringImpl()); 141 break; 142 case WholeString: 143 al.Add(match.GetOriginalString()); 144 break; 145 } 146 } 147 } 148 } 149 150 /* 151 * The original pattern string 152 */ 153 internal String Pattern { 154 get { 155 return _rep; 156 } 157 } 158 159 /* 160 * Returns the replacement result for a single match 161 */ 162 internal String Replacement(Match match) { 163 StringBuilder sb = new StringBuilder(); 164 165 ReplacementImpl(sb, match); 166 167 return sb.ToString(); 168 } 169 170 /* 171 * Three very similar algorithms appear below: replace (pattern), 172 * replace (evaluator), and split. 173 */ 174 175 176 /* 177 * Replaces all ocurrances of the regex in the string with the 178 * replacement pattern. 179 * 180 * Note that the special case of no matches is handled on its own: 181 * with no matches, the input string is returned unchanged. 182 * The right-to-left case is split out because StringBuilder 183 * doesn't handle right-to-left string building directly very well. 184 */ 185 internal String Replace(Regex regex, String input, int count, int startat) { 186 Match match; 187 188 if (count < -1) 189 throw new ArgumentOutOfRangeException("count", SR.GetString(SR.CountTooSmall)); 190 if (startat < 0 || startat > input.Length) 191 throw new ArgumentOutOfRangeException("startat", SR.GetString(SR.BeginIndexNotNegative)); 192 193 if (count == 0) 194 return input; 195 196 match = regex.Match(input, startat); 197 if (!match.Success) { 198 return input; 199 } 200 else { 201 StringBuilder sb; 202 203 if (!regex.RightToLeft) { 204 sb = new StringBuilder(); 205 int prevat = 0; 206 207 do { 208 if (match.Index != prevat) 209 sb.Append(input, prevat, match.Index - prevat); 210 211 prevat = match.Index + match.Length; 212 ReplacementImpl(sb, match); 213 if (--count == 0) 214 break; 215 216 match = match.NextMatch(); 217 } while (match.Success); 218 219 if (prevat < input.Length) 220 sb.Append(input, prevat, input.Length - prevat); 221 } 222 else { 223 List<String> al = new List<String>(); 224 int prevat = input.Length; 225 226 do { 227 if (match.Index + match.Length != prevat) 228 al.Add(input.Substring(match.Index + match.Length, prevat - match.Index - match.Length)); 229 230 prevat = match.Index; 231 ReplacementImplRTL(al, match); 232 if (--count == 0) 233 break; 234 235 match = match.NextMatch(); 236 } while (match.Success); 237 238 sb = new StringBuilder(); 239 240 if (prevat > 0) 241 sb.Append(input, 0, prevat); 242 243 for (int i = al.Count - 1; i >= 0; i--) { 244 sb.Append(al[i]); 245 } 246 } 247 248 return sb.ToString(); 249 } 250 } 251 252 /* 253 * Replaces all ocurrances of the regex in the string with the 254 * replacement evaluator. 255 * 256 * Note that the special case of no matches is handled on its own: 257 * with no matches, the input string is returned unchanged. 258 * The right-to-left case is split out because StringBuilder 259 * doesn't handle right-to-left string building directly very well. 260 */ 261 internal static String Replace(MatchEvaluator evaluator, Regex regex, 262 String input, int count, int startat) { 263 Match match; 264 265 if (evaluator == null) 266 throw new ArgumentNullException("evaluator"); 267 if (count < -1) 268 throw new ArgumentOutOfRangeException("count", SR.GetString(SR.CountTooSmall)); 269 if (startat < 0 || startat > input.Length) 270 throw new ArgumentOutOfRangeException("startat", SR.GetString(SR.BeginIndexNotNegative)); 271 272 if (count == 0) 273 return input; 274 275 match = regex.Match(input, startat); 276 277 if (!match.Success) { 278 return input; 279 } 280 else { 281 StringBuilder sb; 282 283 if (!regex.RightToLeft) { 284 sb = new StringBuilder(); 285 int prevat = 0; 286 287 do { 288 if (match.Index != prevat) 289 sb.Append(input, prevat, match.Index - prevat); 290 291 prevat = match.Index + match.Length; 292 293 sb.Append(evaluator(match)); 294 295 if (--count == 0) 296 break; 297 298 match = match.NextMatch(); 299 } while (match.Success); 300 301 if (prevat < input.Length) 302 sb.Append(input, prevat, input.Length - prevat); 303 } 304 else { 305 List<String> al = new List<String>(); 306 int prevat = input.Length; 307 308 do { 309 if (match.Index + match.Length != prevat) 310 al.Add(input.Substring(match.Index + match.Length, prevat - match.Index - match.Length)); 311 312 prevat = match.Index; 313 314 al.Add(evaluator(match)); 315 316 if (--count == 0) 317 break; 318 319 match = match.NextMatch(); 320 } while (match.Success); 321 322 sb = new StringBuilder(); 323 324 if (prevat > 0) 325 sb.Append(input, 0, prevat); 326 327 for (int i = al.Count - 1; i >= 0; i--) { 328 sb.Append(al[i]); 329 } 330 } 331 332 return sb.ToString(); 333 } 334 } 335 336 /* 337 * Does a split. In the right-to-left case we reorder the 338 * array to be forwards. 339 */ 340 internal static String[] Split(Regex regex, String input, int count, int startat) { 341 Match match; 342 String[] result; 343 344 if (count < 0) 345 throw new ArgumentOutOfRangeException("count", SR.GetString(SR.CountTooSmall)); 346 347 if (startat < 0 || startat > input.Length) 348 throw new ArgumentOutOfRangeException("startat", SR.GetString(SR.BeginIndexNotNegative)); 349 350 if (count == 1) { 351 result = new String[1]; 352 result[0] = input; 353 return result; 354 } 355 356 count -= 1; 357 358 match = regex.Match(input, startat); 359 360 if (!match.Success) { 361 result = new String[1]; 362 result[0] = input; 363 return result; 364 } 365 else { 366 List<String> al = new List<String>(); 367 368 if (!regex.RightToLeft) { 369 int prevat = 0; 370 371 for (;;) { 372 al.Add(input.Substring(prevat, match.Index - prevat)); 373 374 prevat = match.Index + match.Length; 375 376 // add all matched capture groups to the list. 377 for (int i=1; i<match.Groups.Count; i++) { 378 if (match.IsMatched(i)) 379 al.Add(match.Groups[i].ToString()); 380 } 381 382 if (--count == 0) 383 break; 384 385 match = match.NextMatch(); 386 387 if (!match.Success) 388 break; 389 } 390 391 al.Add(input.Substring(prevat, input.Length - prevat)); 392 } 393 else { 394 int prevat = input.Length; 395 396 for (;;) { 397 al.Add(input.Substring(match.Index + match.Length, prevat - match.Index - match.Length)); 398 399 prevat = match.Index; 400 401 // add all matched capture groups to the list. 402 for (int i=1; i<match.Groups.Count; i++) { 403 if (match.IsMatched(i)) 404 al.Add(match.Groups[i].ToString()); 405 } 406 407 if (--count == 0) 408 break; 409 410 match = match.NextMatch(); 411 412 if (!match.Success) 413 break; 414 } 415 416 al.Add(input.Substring(0, prevat)); 417 418 al.Reverse(0, al.Count); 419 } 420 421 return al.ToArray(); 422 } 423 } 424 } 425 426 } 427