1 /* 2 * Copyright (C) 2010 Google Inc. All Rights Reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #ifndef HTMLMetaCharsetParser_h 27 #define HTMLMetaCharsetParser_h 28 29 #include "HTMLToken.h" 30 #include "SegmentedString.h" 31 #include "TextEncoding.h" 32 #include <wtf/Noncopyable.h> 33 34 namespace WebCore { 35 36 class HTMLTokenizer; 37 class TextCodec; 38 39 class HTMLMetaCharsetParser { 40 WTF_MAKE_NONCOPYABLE(HTMLMetaCharsetParser); 41 public: create()42 static PassOwnPtr<HTMLMetaCharsetParser> create() { return adoptPtr(new HTMLMetaCharsetParser()); } 43 44 ~HTMLMetaCharsetParser(); 45 46 // Returns true if done checking, regardless whether an encoding is found. 47 bool checkForMetaCharset(const char*, size_t); 48 encoding()49 const TextEncoding& encoding() { return m_encoding; } 50 51 typedef Vector<pair<String, String> > AttributeList; 52 // The returned encoding might not be valid. 53 static TextEncoding encodingFromMetaAttributes(const AttributeList& 54 ); 55 56 private: 57 HTMLMetaCharsetParser(); 58 59 bool processMeta(); 60 static String extractCharset(const String&); 61 62 enum Mode { 63 None, 64 Charset, 65 Pragma, 66 }; 67 68 OwnPtr<HTMLTokenizer> m_tokenizer; 69 OwnPtr<TextCodec> m_assumedCodec; 70 SegmentedString m_input; 71 HTMLToken m_token; 72 bool m_inHeadSection; 73 74 bool m_doneChecking; 75 TextEncoding m_encoding; 76 }; 77 78 } 79 #endif 80