1 /* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. 2 * Use of this file is governed by the BSD 3-clause license that 3 * can be found in the LICENSE.txt file in the project root. 4 */ 5 6 7 /// 8 /// Useful for rewriting out a buffered input token stream after doing some 9 /// augmentation or other manipulations on it. 10 /// 11 /// 12 /// You can insert stuff, replace, and delete chunks. Note that the operations 13 /// are done lazily--only if you convert the buffer to a _String_ with 14 /// _org.antlr.v4.runtime.TokenStream#getText()_. This is very efficient because you are not 15 /// moving data around all the time. As the buffer of tokens is converted to 16 /// strings, the _#getText()_ method(s) scan the input token stream and 17 /// check to see if there is an operation at the current index. If so, the 18 /// operation is done and then normal _String_ rendering continues on the 19 /// buffer. This is like having multiple Turing machine instruction streams 20 /// (programs) operating on a single input tape. :) 21 /// 22 /// 23 /// This rewriter makes no modifications to the token stream. It does not ask the 24 /// stream to fill itself up nor does it advance the input cursor. The token 25 /// stream _org.antlr.v4.runtime.TokenStream#index()_ will return the same value before and 26 /// after any _#getText()_ call. 27 /// 28 /// 29 /// The rewriter only works on tokens that you have in the buffer and ignores the 30 /// current input cursor. If you are buffering tokens on-demand, calling 31 /// _#getText()_ halfway through the input will only do rewrites for those 32 /// tokens in the first half of the file. 33 /// 34 /// 35 /// Since the operations are done lazily at _#getText_-time, operations do 36 /// not screw up the token index values. That is, an insert operation at token 37 /// index `i` does not change the index values for tokens 38 /// `i`+1..n-1. 39 /// 40 /// 41 /// Because operations never actually alter the buffer, you may always get the 42 /// original token stream back without undoing anything. Since the instructions 43 /// are queued up, you can easily simulate transactions and roll back any changes 44 /// if there is an error just by removing instructions. For example, 45 /// 46 /// 47 /// CharStream input = new ANTLRFileStream("input"); 48 /// TLexer lex = new TLexer(input); 49 /// CommonTokenStream tokens = new CommonTokenStream(lex); 50 /// T parser = new T(tokens); 51 /// TokenStreamRewriter rewriter = new TokenStreamRewriter(tokens); 52 /// parser.startRule(); 53 /// 54 /// 55 /// 56 /// Then in the rules, you can execute (assuming rewriter is visible): 57 /// 58 /// 59 /// Token t,u; 60 /// ... 61 /// rewriter.insertAfter(t, "text to put after t");} 62 /// rewriter.insertAfter(u, "text after u");} 63 /// System.out.println(rewriter.getText()); 64 /// 65 /// 66 /// 67 /// You can also have multiple "instruction streams" and get multiple rewrites 68 /// from a single pass over the input. Just name the instruction streams and use 69 /// that name again when printing the buffer. This could be useful for generating 70 /// a C file and also its header file--all from the same buffer: 71 /// 72 /// 73 /// rewriter.insertAfter("pass1", t, "text to put after t");} 74 /// rewriter.insertAfter("pass2", u, "text after u");} 75 /// System.out.println(rewriter.getText("pass1")); 76 /// System.out.println(rewriter.getText("pass2")); 77 /// 78 /// 79 /// 80 /// If you don't use named rewrite streams, a "default" stream is used as the 81 /// first example shows. 82 /// 83 84 import Foundation 85 86 public class TokenStreamRewriter { 87 public let DEFAULT_PROGRAM_NAME = "default" 88 public static let PROGRAM_INIT_SIZE = 100 89 public static let MIN_TOKEN_INDEX = 0 90 91 // Define the rewrite operation hierarchy 92 public class RewriteOperation: CustomStringConvertible { 93 /// What index into rewrites List are we? 94 internal var instructionIndex = 0 95 /// Token buffer index. 96 internal var index: Int 97 internal var text: String? 98 internal var lastIndex = 0 99 internal weak var tokens: TokenStream! 100 101 init(_ index: Int, _ tokens: TokenStream) { 102 self.index = index 103 self.tokens = tokens 104 } 105 106 init(_ index: Int, _ text: String?, _ tokens: TokenStream) { 107 self.index = index 108 self.text = text 109 self.tokens = tokens 110 } 111 112 /// Execute the rewrite operation by possibly adding to the buffer. 113 /// Return the index of the next token to operate on. 114 /// executenull115 public func execute(_ buf: inout String) throws -> Int { 116 return index 117 } 118 119 public var description: String { 120 let opName = String(describing: type(of: self)) 121 return "<\(opName)@\(try! tokens.get(index)):\"\(text!)\">" 122 } 123 } 124 125 public class InsertBeforeOp: RewriteOperation { executenull126 override public func execute(_ buf: inout String) throws -> Int { 127 if let text = text { 128 buf.append(text) 129 } 130 let token = try tokens.get(index) 131 if token.getType() != CommonToken.EOF { 132 buf.append(token.getText()!) 133 } 134 return index + 1 135 } 136 } 137 138 public class InsertAfterOp: InsertBeforeOp { 139 public override init(_ index: Int, _ text: String?, _ tokens: TokenStream) { 140 super.init(index + 1, text, tokens) 141 } 142 } 143 144 /// I'm going to try replacing range from x..y with (y-x)+1 ReplaceOp 145 /// instructions. 146 /// 147 148 public class ReplaceOp: RewriteOperation { 149 150 public init(_ from: Int, _ to: Int, _ text: String?, _ tokens: TokenStream) { 151 super.init(from, text, tokens) 152 lastIndex = to 153 } 154 155 override executenull156 public func execute(_ buf: inout String) -> Int { 157 if let text = text { 158 buf += text 159 } 160 return lastIndex + 1 161 } 162 163 override 164 public var description: String { 165 let token = try! tokens.get(index) 166 let lastToken = try! tokens.get(lastIndex) 167 if let text = text { 168 return "<ReplaceOp@\(token)..\(lastToken):\"\(text)\">" 169 } 170 return "<DeleteOp@\(token)..\(lastToken)>" 171 } 172 } 173 174 public class RewriteOperationArray{ 175 private final var rewrites = [RewriteOperation?]() 176 177 public init() { 178 rewrites.reserveCapacity(TokenStreamRewriter.PROGRAM_INIT_SIZE) 179 } 180 appendnull181 final func append(_ op: RewriteOperation) { 182 op.instructionIndex = rewrites.count 183 rewrites.append(op) 184 } 185 rollbacknull186 final func rollback(_ instructionIndex: Int) { 187 rewrites = Array(rewrites[TokenStreamRewriter.MIN_TOKEN_INDEX ..< instructionIndex]) 188 } 189 190 final var count: Int { 191 return rewrites.count 192 } 193 194 final var isEmpty: Bool { 195 return rewrites.isEmpty 196 } 197 198 /// We need to combine operations and report invalid operations (like 199 /// overlapping replaces that are not completed nested). Inserts to 200 /// same index need to be combined etc... Here are the cases: 201 /// 202 /// I.i.u I.j.v leave alone, nonoverlapping 203 /// I.i.u I.i.v combine: Iivu 204 /// 205 /// R.i-j.u R.x-y.v | i-j in x-y delete first R 206 /// R.i-j.u R.i-j.v delete first R 207 /// R.i-j.u R.x-y.v | x-y in i-j ERROR 208 /// R.i-j.u R.x-y.v | boundaries overlap ERROR 209 /// 210 /// Delete special case of replace (text==null): 211 /// D.i-j.u D.x-y.v | boundaries overlap combine to max(min)..max(right) 212 /// 213 /// I.i.u R.x-y.v | i in (x+1)-y delete I (since insert before 214 /// we're not deleting i) 215 /// I.i.u R.x-y.v | i not in (x+1)-y leave alone, nonoverlapping 216 /// R.x-y.v I.i.u | i in x-y ERROR 217 /// R.x-y.v I.x.u R.x-y.uv (combine, delete I) 218 /// R.x-y.v I.i.u | i not in x-y leave alone, nonoverlapping 219 /// 220 /// I.i.u = insert u before op @ index i 221 /// R.x-y.u = replace x-y indexed tokens with u 222 /// 223 /// First we need to examine replaces. For any replace op: 224 /// 225 /// 1. wipe out any insertions before op within that range. 226 /// 2. Drop any replace op before that is contained completely within 227 /// that range. 228 /// 3. Throw exception upon boundary overlap with any previous replace. 229 /// 230 /// Then we can deal with inserts: 231 /// 232 /// 1. for any inserts to same index, combine even if not adjacent. 233 /// 2. for any prior replace with same left boundary, combine this 234 /// insert with replace and delete this replace. 235 /// 3. throw exception if index in same range as previous replace 236 /// 237 /// Don't actually delete; make op null in list. Easier to walk list. 238 /// Later we can throw as we add to index → op map. 239 /// 240 /// Note that I.2 R.2-2 will wipe out I.2 even though, technically, the 241 /// inserted stuff would be before the replace range. But, if you 242 /// add tokens in front of a method body '{' and then delete the method 243 /// body, I think the stuff before the '{' you added should disappear too. 244 /// 245 /// Return a map from token index to operation. 246 /// reduceToSingleOperationPerIndexnull247 final func reduceToSingleOperationPerIndex() throws -> [Int: RewriteOperation] { 248 249 let rewritesCount = rewrites.count 250 // WALK REPLACES 251 for i in 0..<rewritesCount { 252 guard let rop = rewrites[i] as? ReplaceOp else { 253 continue 254 } 255 256 // Wipe prior inserts within range 257 let inserts = getKindOfOps(&rewrites, InsertBeforeOp.self, i) 258 for j in inserts { 259 if let iop = rewrites[j] { 260 if iop.index == rop.index { 261 // E.g., insert before 2, delete 2..2; update replace 262 // text to include insert before, kill insert 263 rewrites[iop.instructionIndex] = nil 264 rop.text = catOpText(iop.text, rop.text) 265 } 266 else if iop.index > rop.index && iop.index <= rop.lastIndex { 267 // delete insert as it's a no-op. 268 rewrites[iop.instructionIndex] = nil 269 } 270 } 271 } 272 // Drop any prior replaces contained within 273 let prevRopIndexList = getKindOfOps(&rewrites, ReplaceOp.self, i) 274 for j in prevRopIndexList { 275 if let prevRop = rewrites[j] { 276 if prevRop.index >= rop.index && prevRop.lastIndex <= rop.lastIndex { 277 // delete replace as it's a no-op. 278 rewrites[prevRop.instructionIndex] = nil 279 continue 280 } 281 // throw exception unless disjoint or identical 282 let disjoint: Bool = 283 prevRop.lastIndex < rop.index || prevRop.index > rop.lastIndex 284 // Delete special case of replace (text==null): 285 // D.i-j.u D.x-y.v | boundaries overlap combine to max(min)..max(right) 286 if prevRop.text == nil && rop.text == nil && !disjoint { 287 rewrites[prevRop.instructionIndex] = nil // kill first delete 288 rop.index = min(prevRop.index, rop.index) 289 rop.lastIndex = max(prevRop.lastIndex, rop.lastIndex) 290 } else if !disjoint { 291 throw ANTLRError.illegalArgument(msg: "replace op boundaries of \(rop.description) " + 292 "overlap with previous \(prevRop.description)") 293 } 294 } 295 } 296 } 297 298 // WALK INSERTS 299 for i in 0..<rewritesCount { 300 guard let iop = rewrites[i] else { 301 continue 302 } 303 if !(iop is InsertBeforeOp) { 304 continue 305 } 306 307 // combine current insert with prior if any at same index 308 let prevIopIndexList = getKindOfOps(&rewrites, InsertBeforeOp.self, i) 309 for j in prevIopIndexList { 310 if let prevIop = rewrites[j] { 311 if prevIop.index == iop.index { 312 if prevIop is InsertAfterOp { 313 iop.text = catOpText(prevIop.text, iop.text) 314 rewrites[prevIop.instructionIndex] = nil 315 } 316 else if prevIop is InsertBeforeOp { 317 // convert to strings...we're in process of toString'ing 318 // whole token buffer so no lazy eval issue with any templates 319 iop.text = catOpText(iop.text, prevIop.text) 320 // delete redundant prior insert 321 rewrites[prevIop.instructionIndex] = nil 322 } 323 } 324 } 325 } 326 327 // look for replaces where iop.index is in range; error 328 let ropIndexList = getKindOfOps(&rewrites, ReplaceOp.self, i) 329 for j in ropIndexList { 330 if let rop = rewrites[j] { 331 if iop.index == rop.index { 332 rop.text = catOpText(iop.text, rop.text) 333 rewrites[i] = nil // delete current insert 334 continue 335 } 336 if iop.index >= rop.index && iop.index <= rop.lastIndex { 337 throw ANTLRError.illegalArgument(msg: "insert op \(iop.description) within" + 338 " boundaries of previous \(rop.description)") 339 340 } 341 } 342 } 343 } 344 345 var m = [Int: RewriteOperation]() 346 for i in 0..<rewritesCount { 347 if let op = rewrites[i] { 348 if m[op.index] != nil { 349 throw ANTLRError.illegalArgument(msg: "should only be one op per index") 350 } 351 m[op.index] = op 352 } 353 } 354 355 return m 356 } 357 catOpTextnull358 final func catOpText(_ a: String?, _ b: String?) -> String { 359 let x = a ?? "" 360 let y = b ?? "" 361 return x + y 362 } 363 364 /// Get all operations before an index of a particular kind 365 getKindOfOps<T: RewriteOperation>null366 final func getKindOfOps<T: RewriteOperation>(_ rewrites: inout [RewriteOperation?], _ kind: T.Type, _ before: Int ) -> [Int] { 367 368 let length = min(before, rewrites.count) 369 var op = [Int]() 370 op.reserveCapacity(length) 371 for i in 0..<length { 372 if rewrites[i] is T { 373 op.append(i) 374 } 375 } 376 return op 377 } 378 } 379 380 /// Our source stream 381 internal var tokens: TokenStream 382 383 /// You may have multiple, named streams of rewrite operations. 384 /// I'm calling these things "programs." 385 /// Maps String (name) → rewrite (List) 386 /// 387 internal var programs = [String: RewriteOperationArray]() 388 389 /// Map String (program name) → Integer index 390 internal final var lastRewriteTokenIndexes: [String: Int] 391 392 public init(_ tokens: TokenStream) { 393 self.tokens = tokens 394 programs[DEFAULT_PROGRAM_NAME] = RewriteOperationArray() 395 lastRewriteTokenIndexes = Dictionary<String, Int>() 396 } 397 getTokenStreamnull398 public final func getTokenStream() -> TokenStream { 399 return tokens 400 } 401 rollbacknull402 public func rollback(_ instructionIndex: Int) { 403 rollback(DEFAULT_PROGRAM_NAME, instructionIndex) 404 } 405 406 /// Rollback the instruction stream for a program so that 407 /// the indicated instruction (via instructionIndex) is no 408 /// longer in the stream. UNTESTED! 409 /// rollbacknull410 public func rollback(_ programName: String, _ instructionIndex: Int) { 411 if let program = programs[programName] { 412 program.rollback(instructionIndex) 413 } 414 } 415 deleteProgramnull416 public func deleteProgram() { 417 deleteProgram(DEFAULT_PROGRAM_NAME) 418 } 419 420 /// Reset the program so that no instructions exist deleteProgramnull421 public func deleteProgram(_ programName: String) { 422 rollback(programName, TokenStreamRewriter.MIN_TOKEN_INDEX) 423 } 424 insertAfternull425 public func insertAfter(_ t: Token, _ text: String) { 426 insertAfter(DEFAULT_PROGRAM_NAME, t, text) 427 } 428 insertAfternull429 public func insertAfter(_ index: Int, _ text: String) { 430 insertAfter(DEFAULT_PROGRAM_NAME, index, text) 431 } 432 insertAfternull433 public func insertAfter(_ programName: String, _ t: Token, _ text: String) { 434 insertAfter(programName, t.getTokenIndex(), text) 435 } 436 insertAfternull437 public func insertAfter(_ programName: String, _ index: Int, _ text: String) { 438 // to insert after, just insert before next index (even if past end) 439 let op = InsertAfterOp(index, text, tokens) 440 let rewrites = getProgram(programName) 441 rewrites.append(op) 442 } 443 insertBeforenull444 public func insertBefore(_ t: Token, _ text: String) { 445 insertBefore(DEFAULT_PROGRAM_NAME, t, text) 446 } 447 insertBeforenull448 public func insertBefore(_ index: Int, _ text: String) { 449 insertBefore(DEFAULT_PROGRAM_NAME, index, text) 450 } 451 insertBeforenull452 public func insertBefore(_ programName: String, _ t: Token, _ text: String) { 453 insertBefore(programName, t.getTokenIndex(), text) 454 } 455 insertBeforenull456 public func insertBefore(_ programName: String, _ index: Int, _ text: String) { 457 let op = InsertBeforeOp(index, text, tokens) 458 let rewrites = getProgram(programName) 459 rewrites.append(op) 460 } 461 replacenull462 public func replace(_ index: Int, _ text: String) throws { 463 try replace(DEFAULT_PROGRAM_NAME, index, index, text) 464 } 465 replacenull466 public func replace(_ from: Int, _ to: Int, _ text: String) throws { 467 try replace(DEFAULT_PROGRAM_NAME, from, to, text) 468 } 469 replacenull470 public func replace(_ indexT: Token, _ text: String) throws { 471 try replace(DEFAULT_PROGRAM_NAME, indexT, indexT, text) 472 } 473 replacenull474 public func replace(_ from: Token, _ to: Token, _ text: String) throws { 475 try replace(DEFAULT_PROGRAM_NAME, from, to, text) 476 } 477 replacenull478 public func replace(_ programName: String, _ from: Int, _ to: Int, _ text: String?) throws { 479 if from > to || from < 0 || to < 0 || to >= tokens.size() { 480 throw ANTLRError.illegalArgument(msg: "replace: range invalid: \(from)..\(to)(size=\(tokens.size()))") 481 } 482 let op = ReplaceOp(from, to, text, tokens) 483 let rewritesArray = getProgram(programName) 484 rewritesArray.append(op) 485 } 486 replacenull487 public func replace(_ programName: String, _ from: Token, _ to: Token, _ text: String?) throws { 488 try replace(programName, 489 from.getTokenIndex(), 490 to.getTokenIndex(), 491 text) 492 } 493 deletenull494 public func delete(_ index: Int) throws { 495 try delete(DEFAULT_PROGRAM_NAME, index, index) 496 } 497 deletenull498 public func delete(_ from: Int, _ to: Int) throws { 499 try delete(DEFAULT_PROGRAM_NAME, from, to) 500 } 501 deletenull502 public func delete(_ indexT: Token) throws { 503 try delete(DEFAULT_PROGRAM_NAME, indexT, indexT) 504 } 505 deletenull506 public func delete(_ from: Token, _ to: Token) throws { 507 try delete(DEFAULT_PROGRAM_NAME, from, to) 508 } 509 deletenull510 public func delete(_ programName: String, _ from: Int, _ to: Int) throws { 511 try replace(programName, from, to, nil) 512 } 513 deletenull514 public func delete(_ programName: String, _ from: Token, _ to: Token) throws { 515 try replace(programName, from, to, nil) 516 } 517 getLastRewriteTokenIndexnull518 public func getLastRewriteTokenIndex() -> Int { 519 return getLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME) 520 } 521 getLastRewriteTokenIndexnull522 internal func getLastRewriteTokenIndex(_ programName: String) -> Int { 523 return lastRewriteTokenIndexes[programName] ?? -1 524 } 525 setLastRewriteTokenIndexnull526 internal func setLastRewriteTokenIndex(_ programName: String, _ i: Int) { 527 lastRewriteTokenIndexes[programName] = i 528 } 529 getProgramnull530 internal func getProgram(_ name: String) -> RewriteOperationArray { 531 if let program = programs[name] { 532 return program 533 } 534 else { 535 return initializeProgram(name) 536 } 537 } 538 initializeProgramnull539 private func initializeProgram(_ name: String) -> RewriteOperationArray { 540 let program = RewriteOperationArray() 541 programs[name] = program 542 return program 543 } 544 545 /// Return the text from the original tokens altered per the 546 /// instructions given to this rewriter. 547 /// getTextnull548 public func getText() throws -> String { 549 return try getText(DEFAULT_PROGRAM_NAME, Interval.of(0, tokens.size() - 1)) 550 } 551 552 /// Return the text from the original tokens altered per the 553 /// instructions given to this rewriter in programName. 554 /// getTextnull555 public func getText(_ programName: String) throws -> String { 556 return try getText(programName, Interval.of(0, tokens.size() - 1)) 557 } 558 559 /// Return the text associated with the tokens in the interval from the 560 /// original token stream but with the alterations given to this rewriter. 561 /// The interval refers to the indexes in the original token stream. 562 /// We do not alter the token stream in any way, so the indexes 563 /// and intervals are still consistent. Includes any operations done 564 /// to the first and last token in the interval. So, if you did an 565 /// insertBefore on the first token, you would get that insertion. 566 /// The same is true if you do an insertAfter the stop token. 567 /// getTextnull568 public func getText(_ interval: Interval) throws -> String { 569 return try getText(DEFAULT_PROGRAM_NAME, interval) 570 } 571 getTextnull572 public func getText(_ programName: String, _ interval: Interval) throws -> String { 573 var start = interval.a 574 var stop = interval.b 575 576 // ensure start/end are in range 577 if stop > tokens.size() - 1 { 578 stop = tokens.size() - 1 579 } 580 if start < 0 { 581 start = 0 582 } 583 guard let rewrites = programs[programName], !rewrites.isEmpty else { 584 return try tokens.getText(interval) // no instructions to execute 585 } 586 587 var buf = "" 588 589 // First, optimize instruction stream 590 var indexToOp = try rewrites.reduceToSingleOperationPerIndex() 591 592 // Walk buffer, executing instructions and emitting tokens 593 var i = start 594 while i <= stop && i < tokens.size() { 595 let op = indexToOp[i] 596 indexToOp.removeValue(forKey: i) // remove so any left have index size-1 597 let t = try tokens.get(i) 598 if let op = op { 599 i = try op.execute(&buf) // execute operation and skip 600 } 601 else { 602 // no operation at that index, just dump token 603 if t.getType() != CommonToken.EOF { 604 buf.append(t.getText()!) 605 } 606 i += 1 // move to next token 607 } 608 } 609 610 // include stuff after end if it's last index in buffer 611 // So, if they did an insertAfter(lastValidIndex, "foo"), include 612 // foo if end==lastValidIndex. 613 if stop == tokens.size() - 1 { 614 // Scan any remaining operations after last token 615 // should be included (they will be inserts). 616 for op in indexToOp.values { 617 if op.index >= tokens.size() - 1 { 618 buf += op.text! 619 } 620 } 621 } 622 623 return buf 624 } 625 } 626