1 //===- RewriteRope.h - Rope specialized for rewriter ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines the RewriteRope class, which is a powerful string class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_CLANG_REWRITE_CORE_REWRITEROPE_H
14 #define LLVM_CLANG_REWRITE_CORE_REWRITEROPE_H
15 
16 #include "llvm/ADT/IntrusiveRefCntPtr.h"
17 #include "llvm/ADT/StringRef.h"
18 #include <cassert>
19 #include <cstddef>
20 #include <iterator>
21 #include <utility>
22 
23 namespace clang {
24 
25   //===--------------------------------------------------------------------===//
26   // RopeRefCountString Class
27   //===--------------------------------------------------------------------===//
28 
29   /// RopeRefCountString - This struct is allocated with 'new char[]' from the
30   /// heap, and represents a reference counted chunk of string data.  When its
31   /// ref count drops to zero, it is delete[]'d.  This is primarily managed
32   /// through the RopePiece class below.
33   struct RopeRefCountString {
34     unsigned RefCount;
35     char Data[1];  //  Variable sized.
36 
37     void Retain() { ++RefCount; }
38 
39     void Release() {
40       assert(RefCount > 0 && "Reference count is already zero.");
41       if (--RefCount == 0)
42         delete [] (char*)this;
43     }
44   };
45 
46   //===--------------------------------------------------------------------===//
47   // RopePiece Class
48   //===--------------------------------------------------------------------===//
49 
50   /// RopePiece - This class represents a view into a RopeRefCountString object.
51   /// This allows references to string data to be efficiently chopped up and
52   /// moved around without having to push around the string data itself.
53   ///
54   /// For example, we could have a 1M RopePiece and want to insert something
55   /// into the middle of it.  To do this, we split it into two RopePiece objects
56   /// that both refer to the same underlying RopeRefCountString (just with
57   /// different offsets) which is a nice constant time operation.
58   struct RopePiece {
59     llvm::IntrusiveRefCntPtr<RopeRefCountString> StrData;
60     unsigned StartOffs = 0;
61     unsigned EndOffs = 0;
62 
63     RopePiece() = default;
64     RopePiece(llvm::IntrusiveRefCntPtr<RopeRefCountString> Str, unsigned Start,
65               unsigned End)
66         : StrData(std::move(Str)), StartOffs(Start), EndOffs(End) {}
67 
68     const char &operator[](unsigned Offset) const {
69       return StrData->Data[Offset+StartOffs];
70     }
71     char &operator[](unsigned Offset) {
72       return StrData->Data[Offset+StartOffs];
73     }
74 
75     unsigned size() const { return EndOffs-StartOffs; }
76   };
77 
78   //===--------------------------------------------------------------------===//
79   // RopePieceBTreeIterator Class
80   //===--------------------------------------------------------------------===//
81 
82   /// RopePieceBTreeIterator - This class provides read-only forward iteration
83   /// over bytes that are in a RopePieceBTree.  This first iterates over bytes
84   /// in a RopePiece, then iterates over RopePiece's in a RopePieceBTreeLeaf,
85   /// then iterates over RopePieceBTreeLeaf's in a RopePieceBTree.
86   class RopePieceBTreeIterator {
87     /// CurNode - The current B+Tree node that we are inspecting.
88     const void /*RopePieceBTreeLeaf*/ *CurNode = nullptr;
89 
90     /// CurPiece - The current RopePiece in the B+Tree node that we're
91     /// inspecting.
92     const RopePiece *CurPiece = nullptr;
93 
94     /// CurChar - The current byte in the RopePiece we are pointing to.
95     unsigned CurChar = 0;
96 
97   public:
98     using iterator_category = std::forward_iterator_tag;
99     using value_type = const char;
100     using difference_type = std::ptrdiff_t;
101     using pointer = value_type *;
102     using reference = value_type &;
103 
104     RopePieceBTreeIterator() = default;
105     RopePieceBTreeIterator(const void /*RopePieceBTreeNode*/ *N);
106 
107     char operator*() const {
108       return (*CurPiece)[CurChar];
109     }
110 
111     bool operator==(const RopePieceBTreeIterator &RHS) const {
112       return CurPiece == RHS.CurPiece && CurChar == RHS.CurChar;
113     }
114     bool operator!=(const RopePieceBTreeIterator &RHS) const {
115       return !operator==(RHS);
116     }
117 
118     RopePieceBTreeIterator& operator++() {   // Preincrement
119       if (CurChar+1 < CurPiece->size())
120         ++CurChar;
121       else
122         MoveToNextPiece();
123       return *this;
124     }
125 
126     RopePieceBTreeIterator operator++(int) { // Postincrement
127       RopePieceBTreeIterator tmp = *this; ++*this; return tmp;
128     }
129 
130     llvm::StringRef piece() const {
131       return llvm::StringRef(&(*CurPiece)[0], CurPiece->size());
132     }
133 
134     void MoveToNextPiece();
135   };
136 
137   //===--------------------------------------------------------------------===//
138   // RopePieceBTree Class
139   //===--------------------------------------------------------------------===//
140 
141   class RopePieceBTree {
142     void /*RopePieceBTreeNode*/ *Root;
143 
144   public:
145     RopePieceBTree();
146     RopePieceBTree(const RopePieceBTree &RHS);
147     RopePieceBTree &operator=(const RopePieceBTree &) = delete;
148     ~RopePieceBTree();
149 
150     using iterator = RopePieceBTreeIterator;
151 
152     iterator begin() const { return iterator(Root); }
153     iterator end() const { return iterator(); }
154     unsigned size() const;
155     unsigned empty() const { return size() == 0; }
156 
157     void clear();
158 
159     void insert(unsigned Offset, const RopePiece &R);
160 
161     void erase(unsigned Offset, unsigned NumBytes);
162   };
163 
164   //===--------------------------------------------------------------------===//
165   // RewriteRope Class
166   //===--------------------------------------------------------------------===//
167 
168 /// RewriteRope - A powerful string class.  This class supports extremely
169 /// efficient insertions and deletions into the middle of it, even for
170 /// ridiculously long strings.
171 class RewriteRope {
172   RopePieceBTree Chunks;
173 
174   /// We allocate space for string data out of a buffer of size AllocChunkSize.
175   /// This keeps track of how much space is left.
176   llvm::IntrusiveRefCntPtr<RopeRefCountString> AllocBuffer;
177   enum { AllocChunkSize = 4080 };
178   unsigned AllocOffs = AllocChunkSize;
179 
180 public:
181   RewriteRope() = default;
182   RewriteRope(const RewriteRope &RHS) : Chunks(RHS.Chunks) {}
183 
184   using iterator = RopePieceBTree::iterator;
185   using const_iterator = RopePieceBTree::iterator;
186 
187   iterator begin() const { return Chunks.begin(); }
188   iterator end() const { return Chunks.end(); }
189   unsigned size() const { return Chunks.size(); }
190 
191   void clear() {
192     Chunks.clear();
193   }
194 
195   void assign(const char *Start, const char *End) {
196     clear();
197     if (Start != End)
198       Chunks.insert(0, MakeRopeString(Start, End));
199   }
200 
201   void insert(unsigned Offset, const char *Start, const char *End) {
202     assert(Offset <= size() && "Invalid position to insert!");
203     if (Start == End) return;
204     Chunks.insert(Offset, MakeRopeString(Start, End));
205   }
206 
207   void erase(unsigned Offset, unsigned NumBytes) {
208     assert(Offset+NumBytes <= size() && "Invalid region to erase!");
209     if (NumBytes == 0) return;
210     Chunks.erase(Offset, NumBytes);
211   }
212 
213 private:
214   RopePiece MakeRopeString(const char *Start, const char *End);
215 };
216 
217 } // namespace clang
218 
219 #endif // LLVM_CLANG_REWRITE_CORE_REWRITEROPE_H
220