1 /*
2     Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
3 
4     This library is free software; you can redistribute it and/or
5     modify it under the terms of the GNU Library General Public
6     License as published by the Free Software Foundation; either
7     version 2 of the License, or (at your option) any later version.
8 
9     This library is distributed in the hope that it will be useful,
10     but WITHOUT ANY WARRANTY; without even the implied warranty of
11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12     Library General Public License for more details.
13 
14     You should have received a copy of the GNU Library General Public License
15     along with this library; see the file COPYING.LIB.  If not, write to
16     the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17     Boston, MA 02110-1301, USA.
18 */
19 
20 #ifndef SegmentedString_h
21 #define SegmentedString_h
22 
23 #include "PlatformString.h"
24 #include <wtf/Deque.h>
25 #include <wtf/text/TextPosition.h>
26 
27 namespace WebCore {
28 
29 class SegmentedString;
30 
31 class SegmentedSubstring {
32 public:
SegmentedSubstring()33     SegmentedSubstring()
34         : m_length(0)
35         , m_current(0)
36         , m_doNotExcludeLineNumbers(true)
37     {
38     }
39 
SegmentedSubstring(const String & str)40     SegmentedSubstring(const String& str)
41         : m_length(str.length())
42         , m_current(str.isEmpty() ? 0 : str.characters())
43         , m_string(str)
44         , m_doNotExcludeLineNumbers(true)
45     {
46     }
47 
clear()48     void clear() { m_length = 0; m_current = 0; }
49 
excludeLineNumbers()50     bool excludeLineNumbers() const { return !m_doNotExcludeLineNumbers; }
doNotExcludeLineNumbers()51     bool doNotExcludeLineNumbers() const { return m_doNotExcludeLineNumbers; }
52 
setExcludeLineNumbers()53     void setExcludeLineNumbers() { m_doNotExcludeLineNumbers = false; }
54 
numberOfCharactersConsumed()55     int numberOfCharactersConsumed() const { return m_string.length() - m_length; }
56 
appendTo(String & str)57     void appendTo(String& str) const
58     {
59         if (m_string.characters() == m_current) {
60             if (str.isEmpty())
61                 str = m_string;
62             else
63                 str.append(m_string);
64         } else
65             str.append(String(m_current, m_length));
66     }
67 
68 public:
69     int m_length;
70     const UChar* m_current;
71 
72 private:
73     String m_string;
74     bool m_doNotExcludeLineNumbers;
75 };
76 
77 class SegmentedString {
78 public:
SegmentedString()79     SegmentedString()
80         : m_pushedChar1(0)
81         , m_pushedChar2(0)
82         , m_currentChar(0)
83         , m_numberOfCharactersConsumedPriorToCurrentString(0)
84         , m_numberOfCharactersConsumedPriorToCurrentLine(0)
85         , m_currentLine(0)
86         , m_closed(false)
87     {
88     }
89 
SegmentedString(const String & str)90     SegmentedString(const String& str)
91         : m_pushedChar1(0)
92         , m_pushedChar2(0)
93         , m_currentString(str)
94         , m_currentChar(m_currentString.m_current)
95         , m_numberOfCharactersConsumedPriorToCurrentString(0)
96         , m_numberOfCharactersConsumedPriorToCurrentLine(0)
97         , m_currentLine(0)
98         , m_closed(false)
99     {
100     }
101 
102     SegmentedString(const SegmentedString&);
103 
104     const SegmentedString& operator=(const SegmentedString&);
105 
106     void clear();
107     void close();
108 
109     void append(const SegmentedString&);
110     void prepend(const SegmentedString&);
111 
excludeLineNumbers()112     bool excludeLineNumbers() const { return m_currentString.excludeLineNumbers(); }
113     void setExcludeLineNumbers();
114 
push(UChar c)115     void push(UChar c)
116     {
117         if (!m_pushedChar1) {
118             m_pushedChar1 = c;
119             m_currentChar = m_pushedChar1 ? &m_pushedChar1 : m_currentString.m_current;
120         } else {
121             ASSERT(!m_pushedChar2);
122             m_pushedChar2 = c;
123         }
124     }
125 
isEmpty()126     bool isEmpty() const { return !current(); }
127     unsigned length() const;
128 
isClosed()129     bool isClosed() const { return m_closed; }
130 
131     enum LookAheadResult {
132         DidNotMatch,
133         DidMatch,
134         NotEnoughCharacters,
135     };
136 
lookAhead(const String & string)137     LookAheadResult lookAhead(const String& string) { return lookAheadInline<SegmentedString::equalsLiterally>(string); }
lookAheadIgnoringCase(const String & string)138     LookAheadResult lookAheadIgnoringCase(const String& string) { return lookAheadInline<SegmentedString::equalsIgnoringCase>(string); }
139 
advance()140     void advance()
141     {
142         if (!m_pushedChar1 && m_currentString.m_length > 1) {
143             --m_currentString.m_length;
144             m_currentChar = ++m_currentString.m_current;
145             return;
146         }
147         advanceSlowCase();
148     }
149 
advanceAndASSERT(UChar expectedCharacter)150     void advanceAndASSERT(UChar expectedCharacter)
151     {
152         ASSERT_UNUSED(expectedCharacter, *current() == expectedCharacter);
153         advance();
154     }
155 
advanceAndASSERTIgnoringCase(UChar expectedCharacter)156     void advanceAndASSERTIgnoringCase(UChar expectedCharacter)
157     {
158         ASSERT_UNUSED(expectedCharacter, WTF::Unicode::foldCase(*current()) == WTF::Unicode::foldCase(expectedCharacter));
159         advance();
160     }
161 
advancePastNewline(int & lineNumber)162     void advancePastNewline(int& lineNumber)
163     {
164         ASSERT(*current() == '\n');
165         if (!m_pushedChar1 && m_currentString.m_length > 1) {
166             int newLineFlag = m_currentString.doNotExcludeLineNumbers();
167             lineNumber += newLineFlag;
168             m_currentLine += newLineFlag;
169             if (newLineFlag)
170                 m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1;
171             --m_currentString.m_length;
172             m_currentChar = ++m_currentString.m_current;
173             return;
174         }
175         advanceSlowCase(lineNumber);
176     }
177 
advancePastNonNewline()178     void advancePastNonNewline()
179     {
180         ASSERT(*current() != '\n');
181         if (!m_pushedChar1 && m_currentString.m_length > 1) {
182             --m_currentString.m_length;
183             m_currentChar = ++m_currentString.m_current;
184             return;
185         }
186         advanceSlowCase();
187     }
188 
advance(int & lineNumber)189     void advance(int& lineNumber)
190     {
191         if (!m_pushedChar1 && m_currentString.m_length > 1) {
192             int newLineFlag = (*m_currentString.m_current == '\n') & m_currentString.doNotExcludeLineNumbers();
193             lineNumber += newLineFlag;
194             m_currentLine += newLineFlag;
195             if (newLineFlag)
196                 m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1;
197             --m_currentString.m_length;
198             m_currentChar = ++m_currentString.m_current;
199             return;
200         }
201         advanceSlowCase(lineNumber);
202     }
203 
204     // Writes the consumed characters into consumedCharacters, which must
205     // have space for at least |count| characters.
206     void advance(unsigned count, UChar* consumedCharacters);
207 
escaped()208     bool escaped() const { return m_pushedChar1; }
209 
numberOfCharactersConsumed()210     int numberOfCharactersConsumed() const
211     {
212         int numberOfPushedCharacters = 0;
213         if (m_pushedChar1) {
214             ++numberOfPushedCharacters;
215             if (m_pushedChar2)
216                 ++numberOfPushedCharacters;
217         }
218         return m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed() - numberOfPushedCharacters;
219     }
220 
221     String toString() const;
222 
223     const UChar& operator*() const { return *current(); }
224     const UChar* operator->() const { return current(); }
225 
226 
227     // The method is moderately slow, comparing to currentLine method.
228     WTF::ZeroBasedNumber currentColumn() const;
229     WTF::ZeroBasedNumber currentLine() const;
230     // Sets value of line/column variables. Column is specified indirectly by a parameter columnAftreProlog
231     // which is a value of column that we should get after a prolog (first prologLength characters) has been consumed.
232     void setCurrentPosition(WTF::ZeroBasedNumber line, WTF::ZeroBasedNumber columnAftreProlog, int prologLength);
233 
234 private:
235     void append(const SegmentedSubstring&);
236     void prepend(const SegmentedSubstring&);
237 
238     void advanceSlowCase();
239     void advanceSlowCase(int& lineNumber);
240     void advanceSubstring();
current()241     const UChar* current() const { return m_currentChar; }
242 
equalsLiterally(const UChar * str1,const UChar * str2,size_t count)243     static bool equalsLiterally(const UChar* str1, const UChar* str2, size_t count) { return !memcmp(str1, str2, count * sizeof(UChar)); }
equalsIgnoringCase(const UChar * str1,const UChar * str2,size_t count)244     static bool equalsIgnoringCase(const UChar* str1, const UChar* str2, size_t count) { return !WTF::Unicode::umemcasecmp(str1, str2, count); }
245 
246     template<bool equals(const UChar* str1, const UChar* str2, size_t count)>
lookAheadInline(const String & string)247     inline LookAheadResult lookAheadInline(const String& string)
248     {
249         if (!m_pushedChar1 && string.length() <= static_cast<unsigned>(m_currentString.m_length)) {
250             if (equals(string.characters(), m_currentString.m_current, string.length()))
251                 return DidMatch;
252             return DidNotMatch;
253         }
254         return lookAheadSlowCase<equals>(string);
255     }
256 
257     template<bool equals(const UChar* str1, const UChar* str2, size_t count)>
lookAheadSlowCase(const String & string)258     LookAheadResult lookAheadSlowCase(const String& string)
259     {
260         unsigned count = string.length();
261         if (count > length())
262             return NotEnoughCharacters;
263         UChar* consumedCharacters;
264         String consumedString = String::createUninitialized(count, consumedCharacters);
265         advance(count, consumedCharacters);
266         LookAheadResult result = DidNotMatch;
267         if (equals(string.characters(), consumedCharacters, count))
268             result = DidMatch;
269         prepend(SegmentedString(consumedString));
270         return result;
271     }
272 
isComposite()273     bool isComposite() const { return !m_substrings.isEmpty(); }
274 
275     UChar m_pushedChar1;
276     UChar m_pushedChar2;
277     SegmentedSubstring m_currentString;
278     const UChar* m_currentChar;
279     int m_numberOfCharactersConsumedPriorToCurrentString;
280     int m_numberOfCharactersConsumedPriorToCurrentLine;
281     int m_currentLine;
282     Deque<SegmentedSubstring> m_substrings;
283     bool m_closed;
284 };
285 
286 }
287 
288 #endif
289