1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #ifndef __nsCharSeparatedTokenizer_h
8 #define __nsCharSeparatedTokenizer_h
9 
10 #include "mozilla/RangedPtr.h"
11 
12 #include "nsDependentSubstring.h"
13 #include "nsCRT.h"
14 
15 /**
16  * This parses a SeparatorChar-separated string into tokens.
17  * Whitespace surrounding tokens is not treated as part of tokens, however
18  * whitespace inside a token is. If the final token is the empty string, it is
19  * not returned.
20  *
21  * Some examples, with SeparatorChar = ',':
22  *
23  * "foo, bar, baz" ->      "foo" "bar" "baz"
24  * "foo,bar,baz" ->        "foo" "bar" "baz"
25  * "foo , bar hi , baz" -> "foo" "bar hi" "baz"
26  * "foo, ,bar,baz" ->      "foo" "" "bar" "baz"
27  * "foo,,bar,baz" ->       "foo" "" "bar" "baz"
28  * "foo,bar,baz," ->       "foo" "bar" "baz"
29  *
30  * The function used for whitespace detection is a template argument.
31  * By default, it is NS_IsAsciiWhitespace.
32  */
33 template<typename DependentSubstringType, bool IsWhitespace(char16_t)>
34 class nsTCharSeparatedTokenizer
35 {
36   typedef typename DependentSubstringType::char_type CharType;
37   typedef typename DependentSubstringType::substring_type SubstringType;
38 
39 public:
40   // Flags -- only one for now. If we need more, they should be defined to
41   // be 1 << 1, 1 << 2, etc. (They're masks, and aFlags is a bitfield.)
42   enum
43   {
44     SEPARATOR_OPTIONAL = 1
45   };
46 
47   nsTCharSeparatedTokenizer(const SubstringType& aSource,
48                             CharType aSeparatorChar,
49                             uint32_t aFlags = 0)
50     : mIter(aSource.Data(), aSource.Length())
51     , mEnd(aSource.Data() + aSource.Length(), aSource.Data(),
52            aSource.Length())
53     , mSeparatorChar(aSeparatorChar)
54     , mWhitespaceBeforeFirstToken(false)
55     , mWhitespaceAfterCurrentToken(false)
56     , mSeparatorAfterCurrentToken(false)
57     , mSeparatorOptional(aFlags & SEPARATOR_OPTIONAL)
58   {
59     // Skip initial whitespace
60     while (mIter < mEnd && IsWhitespace(*mIter)) {
61       mWhitespaceBeforeFirstToken = true;
62       ++mIter;
63     }
64   }
65 
66   /**
67    * Checks if any more tokens are available.
68    */
hasMoreTokens()69   bool hasMoreTokens() const
70   {
71     MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter),
72                "Should be at beginning of token if there is one");
73 
74     return mIter < mEnd;
75   }
76 
77   /*
78    * Returns true if there is whitespace prior to the first token.
79    */
whitespaceBeforeFirstToken()80   bool whitespaceBeforeFirstToken() const
81   {
82     return mWhitespaceBeforeFirstToken;
83   }
84 
85   /*
86    * Returns true if there is a separator after the current token.
87    * Useful if you want to check whether the last token has a separator
88    * after it which may not be valid.
89    */
separatorAfterCurrentToken()90   bool separatorAfterCurrentToken() const
91   {
92     return mSeparatorAfterCurrentToken;
93   }
94 
95   /*
96    * Returns true if there is any whitespace after the current token.
97    */
whitespaceAfterCurrentToken()98   bool whitespaceAfterCurrentToken() const
99   {
100     return mWhitespaceAfterCurrentToken;
101   }
102 
103   /**
104    * Returns the next token.
105    */
nextToken()106   const DependentSubstringType nextToken()
107   {
108     mozilla::RangedPtr<const CharType> tokenStart = mIter;
109     mozilla::RangedPtr<const CharType> tokenEnd = mIter;
110 
111     MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter),
112                "Should be at beginning of token if there is one");
113 
114     // Search until we hit separator or end (or whitespace, if a separator
115     // isn't required -- see clause with 'break' below).
116     while (mIter < mEnd && *mIter != mSeparatorChar) {
117       // Skip to end of the current word.
118       while (mIter < mEnd &&
119              !IsWhitespace(*mIter) && *mIter != mSeparatorChar) {
120         ++mIter;
121       }
122       tokenEnd = mIter;
123 
124       // Skip whitespace after the current word.
125       mWhitespaceAfterCurrentToken = false;
126       while (mIter < mEnd && IsWhitespace(*mIter)) {
127         mWhitespaceAfterCurrentToken = true;
128         ++mIter;
129       }
130       if (mSeparatorOptional) {
131         // We've hit (and skipped) whitespace, and that's sufficient to end
132         // our token, regardless of whether we've reached a SeparatorChar.
133         break;
134       } // (else, we'll keep looping until we hit mEnd or SeparatorChar)
135     }
136 
137     mSeparatorAfterCurrentToken = (mIter != mEnd &&
138                                    *mIter == mSeparatorChar);
139     MOZ_ASSERT(mSeparatorOptional ||
140                (mSeparatorAfterCurrentToken == (mIter < mEnd)),
141                "If we require a separator and haven't hit the end of "
142                "our string, then we shouldn't have left the loop "
143                "unless we hit a separator");
144 
145     // Skip separator (and any whitespace after it), if we're at one.
146     if (mSeparatorAfterCurrentToken) {
147       ++mIter;
148 
149       while (mIter < mEnd && IsWhitespace(*mIter)) {
150         mWhitespaceAfterCurrentToken = true;
151         ++mIter;
152       }
153     }
154 
155     return Substring(tokenStart.get(), tokenEnd.get());
156   }
157 
158 private:
159   mozilla::RangedPtr<const CharType> mIter;
160   const mozilla::RangedPtr<const CharType> mEnd;
161   CharType mSeparatorChar;
162   bool mWhitespaceBeforeFirstToken;
163   bool mWhitespaceAfterCurrentToken;
164   bool mSeparatorAfterCurrentToken;
165   bool mSeparatorOptional;
166 };
167 
168 template<bool IsWhitespace(char16_t) = NS_IsAsciiWhitespace>
169 class nsCharSeparatedTokenizerTemplate
170   : public nsTCharSeparatedTokenizer<nsDependentSubstring, IsWhitespace>
171 {
172 public:
173   nsCharSeparatedTokenizerTemplate(const nsSubstring& aSource,
174                                    char16_t aSeparatorChar,
175                                    uint32_t aFlags = 0)
176     : nsTCharSeparatedTokenizer<nsDependentSubstring,
177                                 IsWhitespace>(aSource, aSeparatorChar, aFlags)
178   {
179   }
180 };
181 
182 typedef nsCharSeparatedTokenizerTemplate<> nsCharSeparatedTokenizer;
183 
184 template<bool IsWhitespace(char16_t) = NS_IsAsciiWhitespace>
185 class nsCCharSeparatedTokenizerTemplate
186   : public nsTCharSeparatedTokenizer<nsDependentCSubstring, IsWhitespace>
187 {
188 public:
189   nsCCharSeparatedTokenizerTemplate(const nsCSubstring& aSource,
190                                     char aSeparatorChar,
191                                     uint32_t aFlags = 0)
192     : nsTCharSeparatedTokenizer<nsDependentCSubstring,
193                                 IsWhitespace>(aSource, aSeparatorChar, aFlags)
194   {
195   }
196 };
197 
198 typedef nsCCharSeparatedTokenizerTemplate<> nsCCharSeparatedTokenizer;
199 
200 #endif /* __nsCharSeparatedTokenizer_h */
201