2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)
3 Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com)
4 Copyright (C) 2006, 2008 Apple Inc. All rights reserved.
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public
8 License as published by the Free Software Foundation; either
9 version 2 of the License, or (at your option) any later version.
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public License
17 along with this library; see the file COPYING.LIB. If not, write to
18 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 Boston, MA 02110-1301, USA.
23 #ifndef TextResourceDecoder_h
24 #define TextResourceDecoder_h
26 #include "TextEncoding.h"
30 class TextResourceDecoder : public RefCounted<TextResourceDecoder> {
35 EncodingFromXMLHeader,
37 EncodingFromCSSCharset,
38 EncodingFromHTTPHeader,
40 EncodingFromParentFrame
43 static PassRefPtr<TextResourceDecoder> create(const String& mimeType, const TextEncoding& defaultEncoding = TextEncoding(), bool usesEncodingDetector = false)
45 return adoptRef(new TextResourceDecoder(mimeType, defaultEncoding, usesEncodingDetector));
47 ~TextResourceDecoder();
49 void setEncoding(const TextEncoding&, EncodingSource);
50 const TextEncoding& encoding() const { return m_encoding; }
52 String decode(const char* data, size_t length);
55 void setHintEncoding(const TextResourceDecoder* hintDecoder)
57 // hintEncoding is for use with autodetection, which should be
58 // only invoked when hintEncoding comes from auto-detection.
59 if (hintDecoder->m_source == AutoDetectedEncoding)
60 m_hintEncoding = hintDecoder->encoding().name();
63 void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; }
64 bool sawError() const { return m_sawError; }
67 TextResourceDecoder(const String& mimeType, const TextEncoding& defaultEncoding,
68 bool usesEncodingDetector);
70 enum ContentType { PlainText, HTML, XML, CSS }; // PlainText only checks for BOM.
71 static ContentType determineContentType(const String& mimeType);
72 static const TextEncoding& defaultEncoding(ContentType, const TextEncoding& defaultEncoding);
74 size_t checkForBOM(const char*, size_t);
75 bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer);
76 bool checkForHeadCharset(const char*, size_t, bool& movedDataToBuffer);
77 void detectJapaneseEncoding(const char*, size_t);
78 bool shouldAutoDetect() const;
80 ContentType m_contentType;
81 TextEncoding m_encoding;
82 OwnPtr<TextCodec> m_codec;
83 EncodingSource m_source;
84 const char* m_hintEncoding;
85 Vector<char> m_buffer;
87 bool m_checkedForCSSCharset;
88 bool m_checkedForHeadCharset;
89 bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors.
91 bool m_usesEncodingDetector;