1 //===--- RewriteRope.h - Rope specialized for rewriter ----------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  This file defines the RewriteRope class, which is a powerful string class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CLANG_REWRITE_CORE_REWRITEROPE_H
15 #define LLVM_CLANG_REWRITE_CORE_REWRITEROPE_H
16 
17 #include "llvm/ADT/IntrusiveRefCntPtr.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/Support/Compiler.h"
20 #include <cassert>
21 #include <cstddef>
22 #include <cstring>
23 #include <iterator>
24 
25 namespace clang {
26   //===--------------------------------------------------------------------===//
27   // RopeRefCountString Class
28   //===--------------------------------------------------------------------===//
29 
30   /// RopeRefCountString - This struct is allocated with 'new char[]' from the
31   /// heap, and represents a reference counted chunk of string data.  When its
32   /// ref count drops to zero, it is delete[]'d.  This is primarily managed
33   /// through the RopePiece class below.
34   struct RopeRefCountString {
35     unsigned RefCount;
36     char Data[1];  //  Variable sized.
37 
RetainRopeRefCountString38     void Retain() { ++RefCount; }
39 
ReleaseRopeRefCountString40     void Release() {
41       assert(RefCount > 0 && "Reference count is already zero.");
42       if (--RefCount == 0)
43         delete [] (char*)this;
44     }
45   };
46 
47   //===--------------------------------------------------------------------===//
48   // RopePiece Class
49   //===--------------------------------------------------------------------===//
50 
51   /// RopePiece - This class represents a view into a RopeRefCountString object.
52   /// This allows references to string data to be efficiently chopped up and
53   /// moved around without having to push around the string data itself.
54   ///
55   /// For example, we could have a 1M RopePiece and want to insert something
56   /// into the middle of it.  To do this, we split it into two RopePiece objects
57   /// that both refer to the same underlying RopeRefCountString (just with
58   /// different offsets) which is a nice constant time operation.
59   struct RopePiece {
60     llvm::IntrusiveRefCntPtr<RopeRefCountString> StrData;
61     unsigned StartOffs;
62     unsigned EndOffs;
63 
RopePieceRopePiece64     RopePiece() : StrData(nullptr), StartOffs(0), EndOffs(0) {}
65 
RopePieceRopePiece66     RopePiece(llvm::IntrusiveRefCntPtr<RopeRefCountString> Str, unsigned Start,
67               unsigned End)
68         : StrData(std::move(Str)), StartOffs(Start), EndOffs(End) {}
69 
70     const char &operator[](unsigned Offset) const {
71       return StrData->Data[Offset+StartOffs];
72     }
73     char &operator[](unsigned Offset) {
74       return StrData->Data[Offset+StartOffs];
75     }
76 
sizeRopePiece77     unsigned size() const { return EndOffs-StartOffs; }
78   };
79 
80   //===--------------------------------------------------------------------===//
81   // RopePieceBTreeIterator Class
82   //===--------------------------------------------------------------------===//
83 
84   /// RopePieceBTreeIterator - This class provides read-only forward iteration
85   /// over bytes that are in a RopePieceBTree.  This first iterates over bytes
86   /// in a RopePiece, then iterates over RopePiece's in a RopePieceBTreeLeaf,
87   /// then iterates over RopePieceBTreeLeaf's in a RopePieceBTree.
88   class RopePieceBTreeIterator :
89       public std::iterator<std::forward_iterator_tag, const char, ptrdiff_t> {
90     /// CurNode - The current B+Tree node that we are inspecting.
91     const void /*RopePieceBTreeLeaf*/ *CurNode;
92     /// CurPiece - The current RopePiece in the B+Tree node that we're
93     /// inspecting.
94     const RopePiece *CurPiece;
95     /// CurChar - The current byte in the RopePiece we are pointing to.
96     unsigned CurChar;
97   public:
98     // begin iterator.
99     RopePieceBTreeIterator(const void /*RopePieceBTreeNode*/ *N);
100     // end iterator
RopePieceBTreeIterator()101     RopePieceBTreeIterator()
102       : CurNode(nullptr), CurPiece(nullptr), CurChar(0) {}
103 
104     char operator*() const {
105       return (*CurPiece)[CurChar];
106     }
107 
108     bool operator==(const RopePieceBTreeIterator &RHS) const {
109       return CurPiece == RHS.CurPiece && CurChar == RHS.CurChar;
110     }
111     bool operator!=(const RopePieceBTreeIterator &RHS) const {
112       return !operator==(RHS);
113     }
114 
115     RopePieceBTreeIterator& operator++() {   // Preincrement
116       if (CurChar+1 < CurPiece->size())
117         ++CurChar;
118       else
119         MoveToNextPiece();
120       return *this;
121     }
122     inline RopePieceBTreeIterator operator++(int) { // Postincrement
123       RopePieceBTreeIterator tmp = *this; ++*this; return tmp;
124     }
125 
piece()126     llvm::StringRef piece() const {
127       return llvm::StringRef(&(*CurPiece)[0], CurPiece->size());
128     }
129 
130     void MoveToNextPiece();
131   };
132 
133   //===--------------------------------------------------------------------===//
134   // RopePieceBTree Class
135   //===--------------------------------------------------------------------===//
136 
137   class RopePieceBTree {
138     void /*RopePieceBTreeNode*/ *Root;
139     void operator=(const RopePieceBTree &) LLVM_DELETED_FUNCTION;
140   public:
141     RopePieceBTree();
142     RopePieceBTree(const RopePieceBTree &RHS);
143     ~RopePieceBTree();
144 
145     typedef RopePieceBTreeIterator iterator;
begin()146     iterator begin() const { return iterator(Root); }
end()147     iterator end() const { return iterator(); }
148     unsigned size() const;
empty()149     unsigned empty() const { return size() == 0; }
150 
151     void clear();
152 
153     void insert(unsigned Offset, const RopePiece &R);
154 
155     void erase(unsigned Offset, unsigned NumBytes);
156   };
157 
158   //===--------------------------------------------------------------------===//
159   // RewriteRope Class
160   //===--------------------------------------------------------------------===//
161 
162 /// RewriteRope - A powerful string class.  This class supports extremely
163 /// efficient insertions and deletions into the middle of it, even for
164 /// ridiculously long strings.
165 class RewriteRope {
166   RopePieceBTree Chunks;
167 
168   /// We allocate space for string data out of a buffer of size AllocChunkSize.
169   /// This keeps track of how much space is left.
170   llvm::IntrusiveRefCntPtr<RopeRefCountString> AllocBuffer;
171   unsigned AllocOffs;
172   enum { AllocChunkSize = 4080 };
173 
174 public:
RewriteRope()175   RewriteRope() :  AllocBuffer(nullptr), AllocOffs(AllocChunkSize) {}
RewriteRope(const RewriteRope & RHS)176   RewriteRope(const RewriteRope &RHS)
177     : Chunks(RHS.Chunks), AllocBuffer(nullptr), AllocOffs(AllocChunkSize) {
178   }
179 
180   typedef RopePieceBTree::iterator iterator;
181   typedef RopePieceBTree::iterator const_iterator;
begin()182   iterator begin() const { return Chunks.begin(); }
end()183   iterator end() const  { return Chunks.end(); }
size()184   unsigned size() const { return Chunks.size(); }
185 
clear()186   void clear() {
187     Chunks.clear();
188   }
189 
assign(const char * Start,const char * End)190   void assign(const char *Start, const char *End) {
191     clear();
192     if (Start != End)
193       Chunks.insert(0, MakeRopeString(Start, End));
194   }
195 
insert(unsigned Offset,const char * Start,const char * End)196   void insert(unsigned Offset, const char *Start, const char *End) {
197     assert(Offset <= size() && "Invalid position to insert!");
198     if (Start == End) return;
199     Chunks.insert(Offset, MakeRopeString(Start, End));
200   }
201 
erase(unsigned Offset,unsigned NumBytes)202   void erase(unsigned Offset, unsigned NumBytes) {
203     assert(Offset+NumBytes <= size() && "Invalid region to erase!");
204     if (NumBytes == 0) return;
205     Chunks.erase(Offset, NumBytes);
206   }
207 
208 private:
209   RopePiece MakeRopeString(const char *Start, const char *End);
210 };
211 
212 } // end namespace clang
213 
214 #endif
215