1 /*
2     Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)
3     Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com)
4     Copyright (C) 2006, 2008 Apple Inc. All rights reserved.
5 
6     This library is free software; you can redistribute it and/or
7     modify it under the terms of the GNU Library General Public
8     License as published by the Free Software Foundation; either
9     version 2 of the License, or (at your option) any later version.
10 
11     This library is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14     Library General Public License for more details.
15 
16     You should have received a copy of the GNU Library General Public License
17     along with this library; see the file COPYING.LIB.  If not, write to
18     the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19     Boston, MA 02110-1301, USA.
20 
21 */
22 
23 #ifndef TextResourceDecoder_h
24 #define TextResourceDecoder_h
25 
26 #include "TextEncoding.h"
27 
28 namespace WebCore {
29 
30 class HTMLMetaCharsetParser;
31 
32 class TextResourceDecoder : public RefCounted<TextResourceDecoder> {
33 public:
34     enum EncodingSource {
35         DefaultEncoding,
36         AutoDetectedEncoding,
37         EncodingFromXMLHeader,
38         EncodingFromMetaTag,
39         EncodingFromCSSCharset,
40         EncodingFromHTTPHeader,
41         UserChosenEncoding,
42         EncodingFromParentFrame
43     };
44 
45     static PassRefPtr<TextResourceDecoder> create(const String& mimeType, const TextEncoding& defaultEncoding = TextEncoding(), bool usesEncodingDetector = false)
46     {
47         return adoptRef(new TextResourceDecoder(mimeType, defaultEncoding, usesEncodingDetector));
48     }
49     ~TextResourceDecoder();
50 
51     void setEncoding(const TextEncoding&, EncodingSource);
encoding()52     const TextEncoding& encoding() const { return m_encoding; }
53 
54     String decode(const char* data, size_t length);
55     String flush();
56 
setHintEncoding(const TextResourceDecoder * hintDecoder)57     void setHintEncoding(const TextResourceDecoder* hintDecoder)
58     {
59         // hintEncoding is for use with autodetection, which should be
60         // only invoked when hintEncoding comes from auto-detection.
61         if (hintDecoder && hintDecoder->m_source == AutoDetectedEncoding)
62             m_hintEncoding = hintDecoder->encoding().name();
63     }
64 
useLenientXMLDecoding()65     void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; }
sawError()66     bool sawError() const { return m_sawError; }
67 
68 private:
69     TextResourceDecoder(const String& mimeType, const TextEncoding& defaultEncoding,
70                         bool usesEncodingDetector);
71 
72     enum ContentType { PlainText, HTML, XML, CSS }; // PlainText only checks for BOM.
73     static ContentType determineContentType(const String& mimeType);
74     static const TextEncoding& defaultEncoding(ContentType, const TextEncoding& defaultEncoding);
75 
76     size_t checkForBOM(const char*, size_t);
77     bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer);
78     bool checkForHeadCharset(const char*, size_t, bool& movedDataToBuffer);
79     bool checkForMetaCharset(const char*, size_t);
80     void detectJapaneseEncoding(const char*, size_t);
81     bool shouldAutoDetect() const;
82 
83     ContentType m_contentType;
84     TextEncoding m_encoding;
85     OwnPtr<TextCodec> m_codec;
86     EncodingSource m_source;
87     const char* m_hintEncoding;
88     Vector<char> m_buffer;
89     bool m_checkedForBOM;
90     bool m_checkedForCSSCharset;
91     bool m_checkedForHeadCharset;
92     bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors.
93     bool m_sawError;
94     bool m_usesEncodingDetector;
95 
96     OwnPtr<HTMLMetaCharsetParser> m_charsetParser;
97 };
98 
99 }
100 
101 #endif
102