1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=2 sw=2 et tw=78: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #ifndef mozilla_ImportScanner_h
8 #define mozilla_ImportScanner_h
9 
10 /* A simple best-effort scanner for @import rules for the HTML parser */
11 
12 #include "nsString.h"
13 #include "nsTArray.h"
14 
15 namespace mozilla {
16 
17 struct ImportScanner final {
18   ImportScanner() = default;
19 
20   // Called when a <style> element starts.
21   //
22   // Note that this function cannot make assumptions about the internal state,
23   // as you can nest <svg:style> elements.
24   void Start();
25 
26   // Called when a <style> element ends. Returns the list of URLs scanned.
27   nsTArray<nsString> Stop();
28 
29   // Whether Scan() should be called.
ShouldScanfinal30   bool ShouldScan() const {
31     return mState != State::OutsideOfStyleElement && mState != State::Done;
32   }
33 
34   // Scan() should be called when text content is parsed, and returns an array
35   // of found URLs, if any.
36   //
37   // Asserts ShouldScan() returns true.
38   nsTArray<nsString> Scan(Span<const char16_t> aFragment);
39 
40  private:
41   enum class State {
42     // Initial state, doesn't scan anything until Start() is called.
43     OutsideOfStyleElement,
44     // In an idle state during the stylesheet scanning, either at the
45     // beginning or after parsing a rule.
46     Idle,
47     // We've seen a '/' character, but not the '*' yet, so we don't know if
48     // it's a comment.
49     MaybeAtCommentStart,
50     // We're inside a comment.
51     AtComment,
52     // We've seen a '*' while we're in a comment, but we don't now yet whether
53     // '/' comes afterwards (thus ending the comment).
54     MaybeAtCommentEnd,
55     // We're parsing the '@' rule name.
56     AtRuleName,
57     // We're parsing the '@' rule value.
58     AtRuleValue,
59     // We're parsing the '@' rule value and we've seen the delimiter (quote or
60     // url() function) that encloses the url.
61     AtRuleValueDelimited,
62     // We've seen the url, but haven't seen the ';' finishing the rule yet.
63     AfterRuleValue,
64     // We've seen anything that is not an @import or a @charset rule, and thus
65     // further @import / @charset should not be parsed.
66     Done,
67   };
68 
69   void EmitUrl();
70   [[nodiscard]] State Scan(char16_t aChar);
71 
72   static constexpr const uint32_t kMaxRuleNameLength = 7;  // (charset, import)
73 
74   State mState = State::OutsideOfStyleElement;
75   nsAutoStringN<kMaxRuleNameLength> mRuleName;
76   nsAutoStringN<128> mRuleValue;
77   nsTArray<nsString> mUrlsFound;
78 
79   // This is conceptually part of the AtRuleValue* / AfterRuleValue states,
80   // and serves to differentiate between @import (where we actually care about
81   // the value) and @charset (where we don't). It's just more convenient this
82   // way than having separate states for them.
83   bool mInImportRule = false;
84   // If we're in the AtRuleValueDelimited state, what is the closing character
85   // that will end the value. This is either a parenthesis (for unquoted
86   // urls), or a quote, either single or double.
87   char16_t mUrlValueDelimiterClosingChar = 0;
88 };
89 
90 }  // namespace mozilla
91 
92 #endif
93