1 /* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
2  * Use of this file is governed by the BSD 3-clause license that
3  * can be found in the LICENSE.txt file in the project root.
4  */
5 
6 
7 ///
8 /// Useful for rewriting out a buffered input token stream after doing some
9 /// augmentation or other manipulations on it.
10 ///
11 ///
12 /// You can insert stuff, replace, and delete chunks. Note that the operations
13 /// are done lazily--only if you convert the buffer to a _String_ with
14 /// _org.antlr.v4.runtime.TokenStream#getText()_. This is very efficient because you are not
15 /// moving data around all the time. As the buffer of tokens is converted to
16 /// strings, the _#getText()_ method(s) scan the input token stream and
17 /// check to see if there is an operation at the current index. If so, the
18 /// operation is done and then normal _String_ rendering continues on the
19 /// buffer. This is like having multiple Turing machine instruction streams
20 /// (programs) operating on a single input tape. :)
21 ///
22 ///
23 /// This rewriter makes no modifications to the token stream. It does not ask the
24 /// stream to fill itself up nor does it advance the input cursor. The token
25 /// stream _org.antlr.v4.runtime.TokenStream#index()_ will return the same value before and
26 /// after any _#getText()_ call.
27 ///
28 ///
29 /// The rewriter only works on tokens that you have in the buffer and ignores the
30 /// current input cursor. If you are buffering tokens on-demand, calling
31 /// _#getText()_ halfway through the input will only do rewrites for those
32 /// tokens in the first half of the file.
33 ///
34 ///
35 /// Since the operations are done lazily at _#getText_-time, operations do
36 /// not screw up the token index values. That is, an insert operation at token
37 /// index `i` does not change the index values for tokens
38 /// `i`+1..n-1.
39 ///
40 ///
41 /// Because operations never actually alter the buffer, you may always get the
42 /// original token stream back without undoing anything. Since the instructions
43 /// are queued up, you can easily simulate transactions and roll back any changes
44 /// if there is an error just by removing instructions. For example,
45 ///
46 ///
47 /// CharStream input = new ANTLRFileStream("input");
48 /// TLexer lex = new TLexer(input);
49 /// CommonTokenStream tokens = new CommonTokenStream(lex);
50 /// T parser = new T(tokens);
51 /// TokenStreamRewriter rewriter = new TokenStreamRewriter(tokens);
52 /// parser.startRule();
53 ///
54 ///
55 ///
56 /// Then in the rules, you can execute (assuming rewriter is visible):
57 ///
58 ///
59 /// Token t,u;
60 /// ...
61 /// rewriter.insertAfter(t, "text to put after t");}
62 /// rewriter.insertAfter(u, "text after u");}
63 /// System.out.println(rewriter.getText());
64 ///
65 ///
66 ///
67 /// You can also have multiple "instruction streams" and get multiple rewrites
68 /// from a single pass over the input. Just name the instruction streams and use
69 /// that name again when printing the buffer. This could be useful for generating
70 /// a C file and also its header file--all from the same buffer:
71 ///
72 ///
73 /// rewriter.insertAfter("pass1", t, "text to put after t");}
74 /// rewriter.insertAfter("pass2", u, "text after u");}
75 /// System.out.println(rewriter.getText("pass1"));
76 /// System.out.println(rewriter.getText("pass2"));
77 ///
78 ///
79 ///
80 /// If you don't use named rewrite streams, a "default" stream is used as the
81 /// first example shows.
82 ///
83 
84 import Foundation
85 
86 public class TokenStreamRewriter {
87     public let DEFAULT_PROGRAM_NAME = "default"
88     public static let PROGRAM_INIT_SIZE = 100
89     public static let MIN_TOKEN_INDEX = 0
90 
91     // Define the rewrite operation hierarchy
92     public class RewriteOperation: CustomStringConvertible {
93         /// What index into rewrites List are we?
94         internal var instructionIndex = 0
95         /// Token buffer index.
96         internal var index: Int
97         internal var text: String?
98         internal var lastIndex = 0
99         internal weak var tokens: TokenStream!
100 
101         init(_ index: Int, _ tokens: TokenStream) {
102             self.index = index
103             self.tokens = tokens
104         }
105 
106         init(_ index: Int, _ text: String?, _ tokens: TokenStream) {
107             self.index = index
108             self.text = text
109             self.tokens = tokens
110         }
111 
112         /// Execute the rewrite operation by possibly adding to the buffer.
113         /// Return the index of the next token to operate on.
114         ///
executenull115         public func execute(_ buf: inout String) throws -> Int {
116             return index
117         }
118 
119         public var description: String {
120             let opName = String(describing: type(of: self))
121             return "<\(opName)@\(try! tokens.get(index)):\"\(text!)\">"
122         }
123     }
124 
125     public class InsertBeforeOp: RewriteOperation {
executenull126         override public func execute(_ buf: inout String) throws -> Int {
127             if let text = text {
128                 buf.append(text)
129             }
130             let token = try tokens.get(index)
131             if token.getType() != CommonToken.EOF {
132                 buf.append(token.getText()!)
133             }
134             return index + 1
135         }
136     }
137 
138     public class InsertAfterOp: InsertBeforeOp {
139         public override init(_ index: Int, _ text: String?, _ tokens: TokenStream) {
140             super.init(index + 1, text, tokens)
141         }
142     }
143 
144     /// I'm going to try replacing range from x..y with (y-x)+1 ReplaceOp
145     /// instructions.
146     ///
147 
148     public class ReplaceOp: RewriteOperation {
149 
150         public init(_ from: Int, _ to: Int, _ text: String?, _ tokens: TokenStream) {
151             super.init(from, text, tokens)
152             lastIndex = to
153         }
154 
155         override
executenull156         public func execute(_ buf: inout String) -> Int {
157             if let text = text {
158                 buf += text
159             }
160             return lastIndex + 1
161         }
162 
163         override
164         public var description: String {
165             let token = try! tokens.get(index)
166             let lastToken = try! tokens.get(lastIndex)
167             if let text = text {
168                 return "<ReplaceOp@\(token)..\(lastToken):\"\(text)\">"
169             }
170             return "<DeleteOp@\(token)..\(lastToken)>"
171         }
172     }
173 
174     public class RewriteOperationArray{
175         private final var rewrites = [RewriteOperation?]()
176 
177         public init() {
178             rewrites.reserveCapacity(TokenStreamRewriter.PROGRAM_INIT_SIZE)
179         }
180 
appendnull181         final func append(_ op: RewriteOperation) {
182             op.instructionIndex = rewrites.count
183             rewrites.append(op)
184         }
185 
rollbacknull186         final func rollback(_ instructionIndex: Int) {
187             rewrites = Array(rewrites[TokenStreamRewriter.MIN_TOKEN_INDEX ..< instructionIndex])
188         }
189 
190         final var count: Int {
191             return rewrites.count
192         }
193 
194         final var isEmpty: Bool {
195             return rewrites.isEmpty
196         }
197 
198         /// We need to combine operations and report invalid operations (like
199         /// overlapping replaces that are not completed nested). Inserts to
200         /// same index need to be combined etc...  Here are the cases:
201         ///
202         /// I.i.u I.j.v                             leave alone, nonoverlapping
203         /// I.i.u I.i.v                             combine: Iivu
204         ///
205         /// R.i-j.u R.x-y.v | i-j in x-y            delete first R
206         /// R.i-j.u R.i-j.v                         delete first R
207         /// R.i-j.u R.x-y.v | x-y in i-j            ERROR
208         /// R.i-j.u R.x-y.v | boundaries overlap    ERROR
209         ///
210         /// Delete special case of replace (text==null):
211         /// D.i-j.u D.x-y.v | boundaries overlap    combine to max(min)..max(right)
212         ///
213         /// I.i.u R.x-y.v | i in (x+1)-y            delete I (since insert before
214         /// we're not deleting i)
215         /// I.i.u R.x-y.v | i not in (x+1)-y        leave alone, nonoverlapping
216         /// R.x-y.v I.i.u | i in x-y                ERROR
217         /// R.x-y.v I.x.u                           R.x-y.uv (combine, delete I)
218         /// R.x-y.v I.i.u | i not in x-y            leave alone, nonoverlapping
219         ///
220         /// I.i.u = insert u before op @ index i
221         /// R.x-y.u = replace x-y indexed tokens with u
222         ///
223         /// First we need to examine replaces. For any replace op:
224         ///
225         /// 1. wipe out any insertions before op within that range.
226         /// 2. Drop any replace op before that is contained completely within
227         /// that range.
228         /// 3. Throw exception upon boundary overlap with any previous replace.
229         ///
230         /// Then we can deal with inserts:
231         ///
232         /// 1. for any inserts to same index, combine even if not adjacent.
233         /// 2. for any prior replace with same left boundary, combine this
234         /// insert with replace and delete this replace.
235         /// 3. throw exception if index in same range as previous replace
236         ///
237         /// Don't actually delete; make op null in list. Easier to walk list.
238         /// Later we can throw as we add to index &rarr; op map.
239         ///
240         /// Note that I.2 R.2-2 will wipe out I.2 even though, technically, the
241         /// inserted stuff would be before the replace range. But, if you
242         /// add tokens in front of a method body '{' and then delete the method
243         /// body, I think the stuff before the '{' you added should disappear too.
244         ///
245         /// Return a map from token index to operation.
246         ///
reduceToSingleOperationPerIndexnull247         final func reduceToSingleOperationPerIndex() throws -> [Int: RewriteOperation] {
248 
249             let rewritesCount = rewrites.count
250             // WALK REPLACES
251             for i in 0..<rewritesCount {
252                 guard let rop = rewrites[i] as? ReplaceOp else {
253                     continue
254                 }
255 
256                 // Wipe prior inserts within range
257                 let inserts = getKindOfOps(&rewrites, InsertBeforeOp.self, i)
258                 for j in inserts {
259                     if let iop = rewrites[j] {
260                         if iop.index == rop.index {
261                             // E.g., insert before 2, delete 2..2; update replace
262                             // text to include insert before, kill insert
263                             rewrites[iop.instructionIndex] = nil
264                             rop.text = catOpText(iop.text, rop.text)
265                         }
266                         else if iop.index > rop.index && iop.index <= rop.lastIndex {
267                             // delete insert as it's a no-op.
268                             rewrites[iop.instructionIndex] = nil
269                         }
270                     }
271                 }
272                 // Drop any prior replaces contained within
273                 let prevRopIndexList = getKindOfOps(&rewrites, ReplaceOp.self, i)
274                 for j in prevRopIndexList {
275                     if let prevRop = rewrites[j] {
276                         if prevRop.index >= rop.index && prevRop.lastIndex <= rop.lastIndex {
277                             // delete replace as it's a no-op.
278                             rewrites[prevRop.instructionIndex] = nil
279                             continue
280                         }
281                         // throw exception unless disjoint or identical
282                         let disjoint: Bool =
283                             prevRop.lastIndex < rop.index || prevRop.index > rop.lastIndex
284                         // Delete special case of replace (text==null):
285                         // D.i-j.u D.x-y.v  | boundaries overlap    combine to max(min)..max(right)
286                         if prevRop.text == nil && rop.text == nil && !disjoint {
287                             rewrites[prevRop.instructionIndex] = nil // kill first delete
288                             rop.index = min(prevRop.index, rop.index)
289                             rop.lastIndex = max(prevRop.lastIndex, rop.lastIndex)
290                         } else if !disjoint {
291                             throw ANTLRError.illegalArgument(msg: "replace op boundaries of \(rop.description) " +
292                                 "overlap with previous \(prevRop.description)")
293                         }
294                     }
295                 }
296             }
297 
298             // WALK INSERTS
299             for i in 0..<rewritesCount {
300                 guard let iop = rewrites[i] else {
301                     continue
302                 }
303                 if !(iop is InsertBeforeOp) {
304                     continue
305                 }
306 
307                 // combine current insert with prior if any at same index
308                 let prevIopIndexList = getKindOfOps(&rewrites, InsertBeforeOp.self, i)
309                 for j in prevIopIndexList {
310                     if let prevIop = rewrites[j] {
311                         if prevIop.index == iop.index {
312                             if prevIop is InsertAfterOp {
313                                 iop.text = catOpText(prevIop.text, iop.text)
314                                 rewrites[prevIop.instructionIndex] = nil
315                             }
316                             else if prevIop is InsertBeforeOp {
317                                 // convert to strings...we're in process of toString'ing
318                                 // whole token buffer so no lazy eval issue with any templates
319                                 iop.text = catOpText(iop.text, prevIop.text)
320                                 // delete redundant prior insert
321                                 rewrites[prevIop.instructionIndex] = nil
322                             }
323                         }
324                     }
325                 }
326 
327                 // look for replaces where iop.index is in range; error
328                 let ropIndexList = getKindOfOps(&rewrites, ReplaceOp.self, i)
329                 for j in ropIndexList  {
330                     if let rop = rewrites[j] {
331                         if iop.index == rop.index {
332                             rop.text = catOpText(iop.text, rop.text)
333                             rewrites[i] = nil    // delete current insert
334                             continue
335                         }
336                         if iop.index >= rop.index && iop.index <= rop.lastIndex {
337                             throw ANTLRError.illegalArgument(msg: "insert op \(iop.description) within" +
338                                 " boundaries of previous \(rop.description)")
339 
340                         }
341                     }
342                 }
343             }
344 
345             var m = [Int: RewriteOperation]()
346             for i in 0..<rewritesCount {
347                 if let op = rewrites[i] {
348                     if m[op.index] != nil {
349                         throw ANTLRError.illegalArgument(msg: "should only be one op per index")
350                     }
351                     m[op.index] = op
352                 }
353             }
354 
355             return m
356         }
357 
catOpTextnull358         final func catOpText(_ a: String?, _ b: String?) -> String {
359             let x = a ?? ""
360             let y = b ?? ""
361             return x + y
362         }
363 
364         /// Get all operations before an index of a particular kind
365 
getKindOfOps<T: RewriteOperation>null366         final func getKindOfOps<T: RewriteOperation>(_ rewrites: inout [RewriteOperation?], _ kind: T.Type, _ before: Int ) -> [Int] {
367 
368             let length = min(before, rewrites.count)
369             var op = [Int]()
370             op.reserveCapacity(length)
371             for i in 0..<length {
372                 if rewrites[i] is T {
373                     op.append(i)
374                 }
375             }
376             return op
377         }
378     }
379 
380     /// Our source stream
381     internal var tokens: TokenStream
382 
383     /// You may have multiple, named streams of rewrite operations.
384     /// I'm calling these things "programs."
385     /// Maps String (name) &rarr; rewrite (List)
386     ///
387     internal var programs = [String: RewriteOperationArray]()
388 
389     /// Map String (program name) &rarr; Integer index
390     internal final var lastRewriteTokenIndexes: [String: Int]
391 
392     public init(_ tokens: TokenStream) {
393         self.tokens = tokens
394         programs[DEFAULT_PROGRAM_NAME] = RewriteOperationArray()
395         lastRewriteTokenIndexes = Dictionary<String, Int>()
396     }
397 
getTokenStreamnull398     public final func getTokenStream() -> TokenStream {
399         return tokens
400     }
401 
rollbacknull402     public func rollback(_ instructionIndex: Int) {
403         rollback(DEFAULT_PROGRAM_NAME, instructionIndex)
404     }
405 
406     /// Rollback the instruction stream for a program so that
407     /// the indicated instruction (via instructionIndex) is no
408     /// longer in the stream. UNTESTED!
409     ///
rollbacknull410     public func rollback(_ programName: String, _ instructionIndex: Int) {
411         if let program = programs[programName] {
412             program.rollback(instructionIndex)
413         }
414     }
415 
deleteProgramnull416     public func deleteProgram() {
417         deleteProgram(DEFAULT_PROGRAM_NAME)
418     }
419 
420     /// Reset the program so that no instructions exist
deleteProgramnull421     public func deleteProgram(_ programName: String) {
422         rollback(programName, TokenStreamRewriter.MIN_TOKEN_INDEX)
423     }
424 
insertAfternull425     public func insertAfter(_ t: Token, _ text: String) {
426         insertAfter(DEFAULT_PROGRAM_NAME, t, text)
427     }
428 
insertAfternull429     public func insertAfter(_ index: Int, _ text: String) {
430         insertAfter(DEFAULT_PROGRAM_NAME, index, text)
431     }
432 
insertAfternull433     public func insertAfter(_ programName: String, _ t: Token, _ text: String) {
434         insertAfter(programName, t.getTokenIndex(), text)
435     }
436 
insertAfternull437     public func insertAfter(_ programName: String, _ index: Int, _ text: String) {
438         // to insert after, just insert before next index (even if past end)
439         let op = InsertAfterOp(index, text, tokens)
440         let rewrites = getProgram(programName)
441         rewrites.append(op)
442     }
443 
insertBeforenull444     public func insertBefore(_ t: Token, _ text: String) {
445         insertBefore(DEFAULT_PROGRAM_NAME, t, text)
446     }
447 
insertBeforenull448     public func insertBefore(_ index: Int, _ text: String) {
449         insertBefore(DEFAULT_PROGRAM_NAME, index, text)
450     }
451 
insertBeforenull452     public func insertBefore(_ programName: String, _ t: Token, _ text: String) {
453         insertBefore(programName, t.getTokenIndex(), text)
454     }
455 
insertBeforenull456     public func insertBefore(_ programName: String, _ index: Int, _ text: String) {
457         let op = InsertBeforeOp(index, text, tokens)
458         let rewrites = getProgram(programName)
459         rewrites.append(op)
460     }
461 
replacenull462     public func replace(_ index: Int, _ text: String) throws {
463         try replace(DEFAULT_PROGRAM_NAME, index, index, text)
464     }
465 
replacenull466     public func replace(_ from: Int, _ to: Int, _ text: String) throws {
467         try replace(DEFAULT_PROGRAM_NAME, from, to, text)
468     }
469 
replacenull470     public func replace(_ indexT: Token, _ text: String) throws {
471         try replace(DEFAULT_PROGRAM_NAME, indexT, indexT, text)
472     }
473 
replacenull474     public func replace(_ from: Token, _ to: Token, _ text: String) throws {
475         try  replace(DEFAULT_PROGRAM_NAME, from, to, text)
476     }
477 
replacenull478     public func replace(_ programName: String, _ from: Int, _ to: Int, _ text: String?) throws {
479         if from > to || from < 0 || to < 0 || to >= tokens.size() {
480             throw ANTLRError.illegalArgument(msg: "replace: range invalid: \(from)..\(to)(size=\(tokens.size()))")
481         }
482         let op = ReplaceOp(from, to, text, tokens)
483         let rewritesArray = getProgram(programName)
484         rewritesArray.append(op)
485     }
486 
replacenull487     public func replace(_ programName: String, _ from: Token, _ to: Token, _ text: String?) throws {
488         try replace(programName,
489             from.getTokenIndex(),
490             to.getTokenIndex(),
491             text)
492     }
493 
deletenull494     public func delete(_ index: Int) throws {
495         try delete(DEFAULT_PROGRAM_NAME, index, index)
496     }
497 
deletenull498     public func delete(_ from: Int, _ to: Int) throws {
499         try delete(DEFAULT_PROGRAM_NAME, from, to)
500     }
501 
deletenull502     public func delete(_ indexT: Token) throws {
503         try delete(DEFAULT_PROGRAM_NAME, indexT, indexT)
504     }
505 
deletenull506     public func delete(_ from: Token, _ to: Token) throws {
507         try delete(DEFAULT_PROGRAM_NAME, from, to)
508     }
509 
deletenull510     public func delete(_ programName: String, _ from: Int, _ to: Int) throws {
511         try replace(programName, from, to, nil)
512     }
513 
deletenull514     public func delete(_ programName: String, _ from: Token, _ to: Token) throws {
515         try replace(programName, from, to, nil)
516     }
517 
getLastRewriteTokenIndexnull518     public func getLastRewriteTokenIndex() -> Int {
519         return getLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME)
520     }
521 
getLastRewriteTokenIndexnull522     internal func getLastRewriteTokenIndex(_ programName: String) -> Int {
523         return lastRewriteTokenIndexes[programName] ?? -1
524     }
525 
setLastRewriteTokenIndexnull526     internal func setLastRewriteTokenIndex(_ programName: String, _ i: Int) {
527         lastRewriteTokenIndexes[programName] = i
528     }
529 
getProgramnull530     internal func getProgram(_ name: String) -> RewriteOperationArray {
531         if let program = programs[name] {
532             return program
533         }
534         else {
535             return initializeProgram(name)
536         }
537     }
538 
initializeProgramnull539     private func initializeProgram(_ name: String) -> RewriteOperationArray {
540         let program = RewriteOperationArray()
541         programs[name] = program
542         return program
543     }
544 
545     /// Return the text from the original tokens altered per the
546     /// instructions given to this rewriter.
547     ///
getTextnull548     public func getText() throws -> String {
549         return try getText(DEFAULT_PROGRAM_NAME, Interval.of(0, tokens.size() - 1))
550     }
551 
552     /// Return the text from the original tokens altered per the
553     /// instructions given to this rewriter in programName.
554     ///
getTextnull555     public func getText(_ programName: String) throws -> String {
556         return try getText(programName, Interval.of(0, tokens.size() - 1))
557     }
558 
559     /// Return the text associated with the tokens in the interval from the
560     /// original token stream but with the alterations given to this rewriter.
561     /// The interval refers to the indexes in the original token stream.
562     /// We do not alter the token stream in any way, so the indexes
563     /// and intervals are still consistent. Includes any operations done
564     /// to the first and last token in the interval. So, if you did an
565     /// insertBefore on the first token, you would get that insertion.
566     /// The same is true if you do an insertAfter the stop token.
567     ///
getTextnull568     public func getText(_ interval: Interval) throws -> String {
569         return try getText(DEFAULT_PROGRAM_NAME, interval)
570     }
571 
getTextnull572     public func getText(_ programName: String, _ interval: Interval) throws -> String {
573         var start = interval.a
574         var stop = interval.b
575 
576         // ensure start/end are in range
577         if stop > tokens.size() - 1 {
578             stop = tokens.size() - 1
579         }
580         if start < 0 {
581             start = 0
582         }
583         guard let rewrites = programs[programName], !rewrites.isEmpty else {
584              return try tokens.getText(interval) // no instructions to execute
585         }
586 
587         var buf = ""
588 
589         // First, optimize instruction stream
590         var indexToOp = try rewrites.reduceToSingleOperationPerIndex()
591 
592         // Walk buffer, executing instructions and emitting tokens
593         var i = start
594         while i <= stop && i < tokens.size() {
595             let op = indexToOp[i]
596             indexToOp.removeValue(forKey: i)  // remove so any left have index size-1
597             let t = try tokens.get(i)
598             if let op = op {
599                 i = try op.execute(&buf) // execute operation and skip
600             }
601             else {
602                 // no operation at that index, just dump token
603                 if t.getType() != CommonToken.EOF {
604                     buf.append(t.getText()!)
605                 }
606                 i += 1 // move to next token
607             }
608         }
609 
610         // include stuff after end if it's last index in buffer
611         // So, if they did an insertAfter(lastValidIndex, "foo"), include
612         // foo if end==lastValidIndex.
613         if stop == tokens.size() - 1 {
614             // Scan any remaining operations after last token
615             // should be included (they will be inserts).
616             for op in indexToOp.values {
617                 if op.index >= tokens.size() - 1 {
618                     buf += op.text!
619                 }
620             }
621         }
622 
623         return buf
624     }
625 }
626