1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim: set ts=2 sw=2 et tw=78: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #ifndef mozilla_ImportScanner_h 8 #define mozilla_ImportScanner_h 9 10 /* A simple best-effort scanner for @import rules for the HTML parser */ 11 12 #include "nsString.h" 13 #include "nsTArray.h" 14 15 namespace mozilla { 16 17 struct ImportScanner final { 18 ImportScanner() = default; 19 20 // Called when a <style> element starts. 21 // 22 // Note that this function cannot make assumptions about the internal state, 23 // as you can nest <svg:style> elements. 24 void Start(); 25 26 // Called when a <style> element ends. Returns the list of URLs scanned. 27 nsTArray<nsString> Stop(); 28 29 // Whether Scan() should be called. ShouldScanfinal30 bool ShouldScan() const { 31 return mState != State::OutsideOfStyleElement && mState != State::Done; 32 } 33 34 // Scan() should be called when text content is parsed, and returns an array 35 // of found URLs, if any. 36 // 37 // Asserts ShouldScan() returns true. 38 nsTArray<nsString> Scan(Span<const char16_t> aFragment); 39 40 private: 41 enum class State { 42 // Initial state, doesn't scan anything until Start() is called. 43 OutsideOfStyleElement, 44 // In an idle state during the stylesheet scanning, either at the 45 // beginning or after parsing a rule. 46 Idle, 47 // We've seen a '/' character, but not the '*' yet, so we don't know if 48 // it's a comment. 49 MaybeAtCommentStart, 50 // We're inside a comment. 51 AtComment, 52 // We've seen a '*' while we're in a comment, but we don't now yet whether 53 // '/' comes afterwards (thus ending the comment). 54 MaybeAtCommentEnd, 55 // We're parsing the '@' rule name. 56 AtRuleName, 57 // We're parsing the '@' rule value. 58 AtRuleValue, 59 // We're parsing the '@' rule value and we've seen the delimiter (quote or 60 // url() function) that encloses the url. 61 AtRuleValueDelimited, 62 // We've seen the url, but haven't seen the ';' finishing the rule yet. 63 AfterRuleValue, 64 // We've seen anything that is not an @import or a @charset rule, and thus 65 // further @import / @charset should not be parsed. 66 Done, 67 }; 68 69 void EmitUrl(); 70 [[nodiscard]] State Scan(char16_t aChar); 71 72 static constexpr const uint32_t kMaxRuleNameLength = 7; // (charset, import) 73 74 State mState = State::OutsideOfStyleElement; 75 nsAutoStringN<kMaxRuleNameLength> mRuleName; 76 nsAutoStringN<128> mRuleValue; 77 nsTArray<nsString> mUrlsFound; 78 79 // This is conceptually part of the AtRuleValue* / AfterRuleValue states, 80 // and serves to differentiate between @import (where we actually care about 81 // the value) and @charset (where we don't). It's just more convenient this 82 // way than having separate states for them. 83 bool mInImportRule = false; 84 // If we're in the AtRuleValueDelimited state, what is the closing character 85 // that will end the value. This is either a parenthesis (for unquoted 86 // urls), or a quote, either single or double. 87 char16_t mUrlValueDelimiterClosingChar = 0; 88 }; 89 90 } // namespace mozilla 91 92 #endif 93