1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #ifndef __nsCharSeparatedTokenizer_h 8 #define __nsCharSeparatedTokenizer_h 9 10 #include "mozilla/RangedPtr.h" 11 12 #include "nsDependentSubstring.h" 13 #include "nsCRT.h" 14 15 /** 16 * This parses a SeparatorChar-separated string into tokens. 17 * Whitespace surrounding tokens is not treated as part of tokens, however 18 * whitespace inside a token is. If the final token is the empty string, it is 19 * not returned. 20 * 21 * Some examples, with SeparatorChar = ',': 22 * 23 * "foo, bar, baz" -> "foo" "bar" "baz" 24 * "foo,bar,baz" -> "foo" "bar" "baz" 25 * "foo , bar hi , baz" -> "foo" "bar hi" "baz" 26 * "foo, ,bar,baz" -> "foo" "" "bar" "baz" 27 * "foo,,bar,baz" -> "foo" "" "bar" "baz" 28 * "foo,bar,baz," -> "foo" "bar" "baz" 29 * 30 * The function used for whitespace detection is a template argument. 31 * By default, it is NS_IsAsciiWhitespace. 32 */ 33 template<typename DependentSubstringType, bool IsWhitespace(char16_t)> 34 class nsTCharSeparatedTokenizer 35 { 36 typedef typename DependentSubstringType::char_type CharType; 37 typedef typename DependentSubstringType::substring_type SubstringType; 38 39 public: 40 // Flags -- only one for now. If we need more, they should be defined to 41 // be 1 << 1, 1 << 2, etc. (They're masks, and aFlags is a bitfield.) 42 enum 43 { 44 SEPARATOR_OPTIONAL = 1 45 }; 46 47 nsTCharSeparatedTokenizer(const SubstringType& aSource, 48 CharType aSeparatorChar, 49 uint32_t aFlags = 0) 50 : mIter(aSource.Data(), aSource.Length()) 51 , mEnd(aSource.Data() + aSource.Length(), aSource.Data(), 52 aSource.Length()) 53 , mSeparatorChar(aSeparatorChar) 54 , mWhitespaceBeforeFirstToken(false) 55 , mWhitespaceAfterCurrentToken(false) 56 , mSeparatorAfterCurrentToken(false) 57 , mSeparatorOptional(aFlags & SEPARATOR_OPTIONAL) 58 { 59 // Skip initial whitespace 60 while (mIter < mEnd && IsWhitespace(*mIter)) { 61 mWhitespaceBeforeFirstToken = true; 62 ++mIter; 63 } 64 } 65 66 /** 67 * Checks if any more tokens are available. 68 */ hasMoreTokens()69 bool hasMoreTokens() const 70 { 71 MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter), 72 "Should be at beginning of token if there is one"); 73 74 return mIter < mEnd; 75 } 76 77 /* 78 * Returns true if there is whitespace prior to the first token. 79 */ whitespaceBeforeFirstToken()80 bool whitespaceBeforeFirstToken() const 81 { 82 return mWhitespaceBeforeFirstToken; 83 } 84 85 /* 86 * Returns true if there is a separator after the current token. 87 * Useful if you want to check whether the last token has a separator 88 * after it which may not be valid. 89 */ separatorAfterCurrentToken()90 bool separatorAfterCurrentToken() const 91 { 92 return mSeparatorAfterCurrentToken; 93 } 94 95 /* 96 * Returns true if there is any whitespace after the current token. 97 */ whitespaceAfterCurrentToken()98 bool whitespaceAfterCurrentToken() const 99 { 100 return mWhitespaceAfterCurrentToken; 101 } 102 103 /** 104 * Returns the next token. 105 */ nextToken()106 const DependentSubstringType nextToken() 107 { 108 mozilla::RangedPtr<const CharType> tokenStart = mIter; 109 mozilla::RangedPtr<const CharType> tokenEnd = mIter; 110 111 MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter), 112 "Should be at beginning of token if there is one"); 113 114 // Search until we hit separator or end (or whitespace, if a separator 115 // isn't required -- see clause with 'break' below). 116 while (mIter < mEnd && *mIter != mSeparatorChar) { 117 // Skip to end of the current word. 118 while (mIter < mEnd && 119 !IsWhitespace(*mIter) && *mIter != mSeparatorChar) { 120 ++mIter; 121 } 122 tokenEnd = mIter; 123 124 // Skip whitespace after the current word. 125 mWhitespaceAfterCurrentToken = false; 126 while (mIter < mEnd && IsWhitespace(*mIter)) { 127 mWhitespaceAfterCurrentToken = true; 128 ++mIter; 129 } 130 if (mSeparatorOptional) { 131 // We've hit (and skipped) whitespace, and that's sufficient to end 132 // our token, regardless of whether we've reached a SeparatorChar. 133 break; 134 } // (else, we'll keep looping until we hit mEnd or SeparatorChar) 135 } 136 137 mSeparatorAfterCurrentToken = (mIter != mEnd && 138 *mIter == mSeparatorChar); 139 MOZ_ASSERT(mSeparatorOptional || 140 (mSeparatorAfterCurrentToken == (mIter < mEnd)), 141 "If we require a separator and haven't hit the end of " 142 "our string, then we shouldn't have left the loop " 143 "unless we hit a separator"); 144 145 // Skip separator (and any whitespace after it), if we're at one. 146 if (mSeparatorAfterCurrentToken) { 147 ++mIter; 148 149 while (mIter < mEnd && IsWhitespace(*mIter)) { 150 mWhitespaceAfterCurrentToken = true; 151 ++mIter; 152 } 153 } 154 155 return Substring(tokenStart.get(), tokenEnd.get()); 156 } 157 158 private: 159 mozilla::RangedPtr<const CharType> mIter; 160 const mozilla::RangedPtr<const CharType> mEnd; 161 CharType mSeparatorChar; 162 bool mWhitespaceBeforeFirstToken; 163 bool mWhitespaceAfterCurrentToken; 164 bool mSeparatorAfterCurrentToken; 165 bool mSeparatorOptional; 166 }; 167 168 template<bool IsWhitespace(char16_t) = NS_IsAsciiWhitespace> 169 class nsCharSeparatedTokenizerTemplate 170 : public nsTCharSeparatedTokenizer<nsDependentSubstring, IsWhitespace> 171 { 172 public: 173 nsCharSeparatedTokenizerTemplate(const nsSubstring& aSource, 174 char16_t aSeparatorChar, 175 uint32_t aFlags = 0) 176 : nsTCharSeparatedTokenizer<nsDependentSubstring, 177 IsWhitespace>(aSource, aSeparatorChar, aFlags) 178 { 179 } 180 }; 181 182 typedef nsCharSeparatedTokenizerTemplate<> nsCharSeparatedTokenizer; 183 184 template<bool IsWhitespace(char16_t) = NS_IsAsciiWhitespace> 185 class nsCCharSeparatedTokenizerTemplate 186 : public nsTCharSeparatedTokenizer<nsDependentCSubstring, IsWhitespace> 187 { 188 public: 189 nsCCharSeparatedTokenizerTemplate(const nsCSubstring& aSource, 190 char aSeparatorChar, 191 uint32_t aFlags = 0) 192 : nsTCharSeparatedTokenizer<nsDependentCSubstring, 193 IsWhitespace>(aSource, aSeparatorChar, aFlags) 194 { 195 } 196 }; 197 198 typedef nsCCharSeparatedTokenizerTemplate<> nsCCharSeparatedTokenizer; 199 200 #endif /* __nsCharSeparatedTokenizer_h */ 201