2009-03-26 Jungshik Shin <jshin@chromium.org>
authoreric@webkit.org <eric@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Fri, 27 Mar 2009 00:01:58 +0000 (00:01 +0000)
committereric@webkit.org <eric@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Fri, 27 Mar 2009 00:01:58 +0000 (00:01 +0000)
        Reviewed by Alexey Proskuryakov.

        Add an encoding detector 'hook' to TextResourceDecoder.
         - add |usesEncodingDetector| to Settings. It's off by default.
         - add a new encoding source type |EncodingFromParent|
           to distinguish cases when the encoding is inherited from
           the parent frame from cases when it's actually auto-detected.
         - add TextEncodingDetector* to platform/text. Currently,
           the only implementation uses ICU. Stub is added for
           ports that do not use ICU and a build on Tiger. Mac OS
           Tiger comes with ICU 3.2 that does not support encoding detector.
         - add ucsdet.h to icu/unicode for ports using ICU.

        Layout tests will be added once bug 20534 is fixed and a WebPreference
        can be controlled in test_shell. With UsesEncodingDetector off, there
        is no change to test.

        http://bugs.webkit.org/show_bug.cgi?id=16482

        * WebCore.vcproj/WebCore.vcproj:
        * WebCore.xcodeproj/project.pbxproj:
        * WebCore/GNUmakefile.am
        * WebCore/WebCore.pro
        * WebCore/WebCore.scons
        * WebCore/WebCoreSources.bkl
        * WebCore.base.exp
        * icu/unicode/ucsdet.h
        * loader/FrameLoader.cpp:
        (WebCore::FrameLoader::write): turn on/off encoding detector depending on Settings and set hintEncoding of TextResourceDecoder to the parent frame encoding if necessary and the security policy allows refering to the parent frame encoding.
        * loader/TextResourceDecoder.cpp:
        (WebCore::TextResourceDecoder::TextResourceDecoder):
        (WebCore::TextResourceDecoder::checkForCSSCharset): add check for EncodingFromParentFrame
        (WebCore::TextResourceDecoder::checkForHeadCharset): ditto
        (WebCore::TextResourceDecoder::shouldAutoDetect):
        (WebCore::TextResourceDecoder::decode): call encoding detector if shouldAutoDetect returns true.
        (WebCore::TextResourceDecoder::flush): if encoding is not determined by now, call encoding detector if shouldAutoDetect returns true.
        * loader/TextResourceDecoder.h:
        (WebCore::TextResourceDecoder::):
        (WebCore::TextResourceDecoder::create):
        (WebCore::TextResourceDecoder::setHintEncoding):
        (WebCore::TextResourceDecoder::source):
        * page/Settings.cpp: add m_usesEncodingDetector
        (WebCore::Settings::Settings):
        (WebCore::Settings::setUsesEncodingDetector):
        * page/Settings.h:
        (WebCore::Settings::usesEncodingDetector):
        * platform/text/TextEncodingDetector.h: Added.
        * platform/text/TextEncodingDetectorICU.cpp: Added.
        (WebCore::detectTextEncoding):
        * platform/text/TextEncodingDetectorNone.cpp: Added.
        (WebCore::detectTextEncoding):

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@42022 268f45cc-cd09-0410-ab3c-d52691b4dbfc

18 files changed:
WebCore/ChangeLog
WebCore/GNUmakefile.am
WebCore/WebCore.base.exp
WebCore/WebCore.pro
WebCore/WebCore.scons
WebCore/WebCore.vcproj/WebCore.vcproj
WebCore/WebCore.xcodeproj/project.pbxproj
WebCore/WebCoreSources.bkl
WebCore/loader/FrameLoader.cpp
WebCore/loader/TextResourceDecoder.cpp
WebCore/loader/TextResourceDecoder.h
WebCore/page/Settings.cpp
WebCore/page/Settings.h
WebKit/mac/ChangeLog
WebKit/mac/WebView/WebPreferenceKeysPrivate.h
WebKit/mac/WebView/WebPreferences.mm
WebKit/mac/WebView/WebPreferencesPrivate.h
WebKit/mac/WebView/WebView.mm

index 25689cb..7f2c8f9 100644 (file)
@@ -1,3 +1,57 @@
+2009-03-26  Jungshik Shin  <jshin@chromium.org>
+
+        Reviewed by Alexey Proskuryakov.
+
+        Add an encoding detector 'hook' to TextResourceDecoder.
+         - add |usesEncodingDetector| to Settings. It's off by default.
+         - add a new encoding source type |EncodingFromParent| 
+           to distinguish cases when the encoding is inherited from
+           the parent frame from cases when it's actually auto-detected.
+         - add TextEncodingDetector* to platform/text. Currently,
+           the only implementation uses ICU. Stub is added for 
+           ports that do not use ICU and a build on Tiger. Mac OS 
+           Tiger comes with ICU 3.2 that does not support encoding detector.
+         - add ucsdet.h to icu/unicode for ports using ICU.
+
+        Layout tests will be added once bug 20534 is fixed and a WebPreference
+        can be controlled in test_shell. With UsesEncodingDetector off, there
+        is no change to test. 
+
+        http://bugs.webkit.org/show_bug.cgi?id=16482
+
+        * WebCore.vcproj/WebCore.vcproj:
+        * WebCore.xcodeproj/project.pbxproj:
+        * WebCore/GNUmakefile.am
+        * WebCore/WebCore.pro
+        * WebCore/WebCore.scons
+        * WebCore/WebCoreSources.bkl
+        * WebCore.base.exp
+        * icu/unicode/ucsdet.h
+        * loader/FrameLoader.cpp:
+        (WebCore::FrameLoader::write): turn on/off encoding detector depending on Settings and set hintEncoding of TextResourceDecoder to the parent frame encoding if necessary and the security policy allows refering to the parent frame encoding.
+        * loader/TextResourceDecoder.cpp:
+        (WebCore::TextResourceDecoder::TextResourceDecoder):
+        (WebCore::TextResourceDecoder::checkForCSSCharset): add check for EncodingFromParentFrame
+        (WebCore::TextResourceDecoder::checkForHeadCharset): ditto
+        (WebCore::TextResourceDecoder::shouldAutoDetect):
+        (WebCore::TextResourceDecoder::decode): call encoding detector if shouldAutoDetect returns true.
+        (WebCore::TextResourceDecoder::flush): if encoding is not determined by now, call encoding detector if shouldAutoDetect returns true.
+        * loader/TextResourceDecoder.h:
+        (WebCore::TextResourceDecoder::):
+        (WebCore::TextResourceDecoder::create):
+        (WebCore::TextResourceDecoder::setHintEncoding):
+        (WebCore::TextResourceDecoder::source):
+        * page/Settings.cpp: add m_usesEncodingDetector
+        (WebCore::Settings::Settings):
+        (WebCore::Settings::setUsesEncodingDetector):
+        * page/Settings.h:
+        (WebCore::Settings::usesEncodingDetector):
+        * platform/text/TextEncodingDetector.h: Added.
+        * platform/text/TextEncodingDetectorICU.cpp: Added.
+        (WebCore::detectTextEncoding):
+        * platform/text/TextEncodingDetectorNone.cpp: Added.
+        (WebCore::detectTextEncoding):
+
 2009-03-26  Darin Adler  <darin@apple.com>
 
         Reviewed by Geoff Garen.
index 13d2c1f..2a25e6a 100644 (file)
@@ -1496,6 +1496,8 @@ webcore_sources += \
        WebCore/platform/text/TextDirection.h \
        WebCore/platform/text/TextEncoding.cpp \
        WebCore/platform/text/TextEncoding.h \
+       WebCore/platform/text/TextEncodingDetector.h \
+       WebCore/platform/text/TextEncodingDetectorNone.cpp \
        WebCore/platform/text/TextEncodingRegistry.cpp \
        WebCore/platform/text/TextEncodingRegistry.h \
        WebCore/platform/text/TextStream.cpp \
index ef9b05f..cae0464 100644 (file)
@@ -409,7 +409,7 @@ __ZN7WebCore19SelectionController9selectAllEv
 __ZN7WebCore19SelectionControllerC1EPNS_5FrameEb
 __ZN7WebCore19TextResourceDecoder5flushEv
 __ZN7WebCore19TextResourceDecoder6decodeEPKcm
-__ZN7WebCore19TextResourceDecoderC1ERKNS_6StringERKNS_12TextEncodingE
+__ZN7WebCore19TextResourceDecoderC1ERKNS_6StringERKNS_12TextEncodingEb
 __ZN7WebCore19TextResourceDecoderD1Ev
 __ZN7WebCore20ResourceResponseBase24setExpectedContentLengthEx
 __ZN7WebCore21ContextMenuController16clearContextMenuEv
@@ -603,6 +603,7 @@ __ZN7WebCore8Settings22setShowsURLsInToolTipsEb
 __ZN7WebCore8Settings23setDefaultFixedFontSizeEi
 __ZN7WebCore8Settings23setEditableLinkBehaviorENS_20EditableLinkBehaviorE
 __ZN7WebCore8Settings23setNeedsTigerMailQuirksEb
+__ZN7WebCore8Settings23setUsesEncodingDetectorEb
 __ZN7WebCore8Settings24setApplicationChromeModeEb
 __ZN7WebCore8Settings24setTextAreasAreResizableEb
 __ZN7WebCore8Settings25setDeveloperExtrasEnabledEb
index dabf837..e992828 100644 (file)
@@ -963,6 +963,7 @@ SOURCES += \
     platform/text/TextCodecUserDefined.cpp \
     platform/text/TextCodecUTF16.cpp \
     platform/text/TextEncoding.cpp \
+    platform/text/TextEncodingDetectorNone.cpp \
     platform/text/TextEncodingRegistry.cpp \
     platform/text/TextStream.cpp \
     platform/ThreadGlobalData.cpp \
index 355f7ff..914cd57 100644 (file)
@@ -840,6 +840,7 @@ sources['platform/text'] = [
     'platform/text/TextCodecUserDefined.cpp',
     'platform/text/TextCodecUTF16.cpp',
     'platform/text/TextEncoding.cpp',
+    'platform/text/TextEncodingDetectorICU.cpp',
     'platform/text/TextEncodingRegistry.cpp',
     'platform/text/TextStream.cpp',
     'platform/text/UnicodeRange.cpp',
index 270e48b..45aa72e 100644 (file)
                                        >\r
                                </File>\r
                                <File\r
+                                       RelativePath="..\platform\text\TextEncodingDetectorICU.cpp"\r
+                                       >\r
+                               </File>\r
+                               <File\r
+                                       RelativePath="..\platform\text\TextEncodingDetector.h"\r
+                                       >\r
+                               </File>\r
+                               <File\r
                                        RelativePath="..\platform\text\TextEncodingRegistry.cpp"\r
                                        >\r
                                </File>\r
index 0e50d3f..c8a7394 100644 (file)
                C091588E0DB4209200E55AF4 /* JSQuarantinedObjectWrapper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = C09158880DB4209200E55AF4 /* JSQuarantinedObjectWrapper.cpp */; };
                C091588F0DB4209200E55AF4 /* JSQuarantinedObjectWrapper.h in Headers */ = {isa = PBXBuildFile; fileRef = C09158890DB4209200E55AF4 /* JSQuarantinedObjectWrapper.h */; };
                C0DFC8700DB6841A003EAE7C /* JSConsoleCustom.cpp in Sources */ = {isa = PBXBuildFile; fileRef = C0DFC86F0DB6841A003EAE7C /* JSConsoleCustom.cpp */; };
+               C105DA620F3AA68F001DD44F /* TextEncodingDetectorICU.cpp in Sources */ = {isa = PBXBuildFile; fileRef = C105DA610F3AA68F001DD44F /* TextEncodingDetectorICU.cpp */; };
+               C105DA640F3AA6B8001DD44F /* TextEncodingDetector.h in Headers */ = {isa = PBXBuildFile; fileRef = C105DA630F3AA6B8001DD44F /* TextEncodingDetector.h */; };
                C6D74AD509AA282E000B0A52 /* ModifySelectionListLevel.h in Headers */ = {isa = PBXBuildFile; fileRef = C6D74AD309AA282E000B0A52 /* ModifySelectionListLevel.h */; };
                C6D74AE409AA290A000B0A52 /* ModifySelectionListLevel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = C6D74AE309AA290A000B0A52 /* ModifySelectionListLevel.cpp */; };
                D05CED290A40BB2C00C5AF38 /* FormatBlockCommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D05CED270A40BB2C00C5AF38 /* FormatBlockCommand.cpp */; };
                C09158880DB4209200E55AF4 /* JSQuarantinedObjectWrapper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = JSQuarantinedObjectWrapper.cpp; sourceTree = "<group>"; };
                C09158890DB4209200E55AF4 /* JSQuarantinedObjectWrapper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = JSQuarantinedObjectWrapper.h; sourceTree = "<group>"; };
                C0DFC86F0DB6841A003EAE7C /* JSConsoleCustom.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = JSConsoleCustom.cpp; sourceTree = "<group>"; };
+               C105DA610F3AA68F001DD44F /* TextEncodingDetectorICU.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = TextEncodingDetectorICU.cpp; sourceTree = "<group>"; };
+               C105DA630F3AA6B8001DD44F /* TextEncodingDetector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = TextEncodingDetector.h; sourceTree = "<group>"; };
                C6D74AD309AA282E000B0A52 /* ModifySelectionListLevel.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ModifySelectionListLevel.h; sourceTree = "<group>"; };
                C6D74AE309AA290A000B0A52 /* ModifySelectionListLevel.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ModifySelectionListLevel.cpp; sourceTree = "<group>"; };
                D05CED270A40BB2C00C5AF38 /* FormatBlockCommand.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = FormatBlockCommand.cpp; sourceTree = "<group>"; };
                                B2C3DA150D006C1D00EF6F26 /* TextDirection.h */,
                                B2C3DA160D006C1D00EF6F26 /* TextEncoding.cpp */,
                                B2C3DA170D006C1D00EF6F26 /* TextEncoding.h */,
+                               C105DA630F3AA6B8001DD44F /* TextEncodingDetector.h */,
+                               C105DA610F3AA68F001DD44F /* TextEncodingDetectorICU.cpp */,
                                B2C3DA180D006C1D00EF6F26 /* TextEncodingRegistry.cpp */,
                                B2C3DA190D006C1D00EF6F26 /* TextEncodingRegistry.h */,
                                B2C3DA1A0D006C1D00EF6F26 /* TextStream.cpp */,
                                B2C3DA450D006C1D00EF6F26 /* TextDirection.h in Headers */,
                                1A6938020A11100A00C127FE /* TextDocument.h in Headers */,
                                B2C3DA470D006C1D00EF6F26 /* TextEncoding.h in Headers */,
+                               C105DA640F3AA6B8001DD44F /* TextEncodingDetector.h in Headers */,
                                B2C3DA490D006C1D00EF6F26 /* TextEncodingRegistry.h in Headers */,
                                933A14300B7D188600A53FFD /* TextEvent.h in Headers */,
                                93309E18099E64920056E581 /* TextGranularity.h in Headers */,
                                AB014DE30E689A4300E10445 /* TextControlInnerElements.cpp in Sources */,
                                1A6938010A11100A00C127FE /* TextDocument.cpp in Sources */,
                                B2C3DA460D006C1D00EF6F26 /* TextEncoding.cpp in Sources */,
+                               C105DA620F3AA68F001DD44F /* TextEncodingDetectorICU.cpp in Sources */,
                                B2C3DA480D006C1D00EF6F26 /* TextEncodingRegistry.cpp in Sources */,
                                933A142E0B7D188600A53FFD /* TextEvent.cpp in Sources */,
                                93309E1B099E64920056E581 /* TextIterator.cpp in Sources */,
index 2dc3cfd..dfc3094 100644 (file)
@@ -780,6 +780,7 @@ This file contains the list of files needed to build WebCore.
         platform/text/TextCodecUTF16.cpp
         platform/text/TextCodecUserDefined.cpp
         platform/text/TextEncoding.cpp
+        platform/text/TextEncodingDetectorICU.cpp
         platform/text/TextEncodingRegistry.cpp
         platform/text/TextStream.cpp
         platform/ThreadGlobalData.cpp
index aa7129f..edd04b0 100644 (file)
@@ -253,6 +253,11 @@ static int numRequests(Document* document)
     return document->docLoader()->requestCount();
 }
 
+static inline bool canReferToParentFrameEncoding(const Frame* frame, const Frame* parentFrame) 
+{
+    return parentFrame && parentFrame->document()->securityOrigin()->canAccess(frame->document()->securityOrigin());
+}
+
 FrameLoader::FrameLoader(Frame* frame, FrameLoaderClient* client)
     : m_frame(frame)
     , m_client(client)
@@ -997,12 +1002,28 @@ void FrameLoader::write(const char* str, int len, bool flush)
     }
     
     if (!m_decoder) {
-        Settings* settings = m_frame->settings();
-        m_decoder = TextResourceDecoder::create(m_responseMIMEType, settings ? settings->defaultTextEncodingName() : String());
-        if (m_encoding.isEmpty()) {
+        if (Settings* settings = m_frame->settings()) {
+            m_decoder = TextResourceDecoder::create(m_responseMIMEType,
+                settings->defaultTextEncodingName(),
+                settings->usesEncodingDetector());
             Frame* parentFrame = m_frame->tree()->parent();
-            if (parentFrame && parentFrame->document()->securityOrigin()->canAccess(m_frame->document()->securityOrigin()))
-                m_decoder->setEncoding(parentFrame->document()->inputEncoding(), TextResourceDecoder::DefaultEncoding);
+            // Set the hint encoding to the parent frame encoding only if
+            // the parent and the current frames share the security origin.
+            // We impose this condition because somebody can make a child frame 
+            // containing a carefully crafted html/javascript in one encoding
+            // that can be mistaken for hintEncoding (or related encoding) by
+            // an auto detector. When interpreted in the latter, it could be
+            // an attack vector.
+            // FIXME: This might be too cautious for non-7bit-encodings and
+            // we may consider relaxing this later after testing.
+            if (canReferToParentFrameEncoding(m_frame, parentFrame))
+                m_decoder->setHintEncoding(parentFrame->document()->decoder());
+        } else
+            m_decoder = TextResourceDecoder::create(m_responseMIMEType, String());
+        Frame* parentFrame = m_frame->tree()->parent();
+        if (m_encoding.isEmpty()) {
+            if (canReferToParentFrameEncoding(m_frame, parentFrame))
+                m_decoder->setEncoding(parentFrame->document()->inputEncoding(), TextResourceDecoder::EncodingFromParentFrame);
         } else {
             m_decoder->setEncoding(m_encoding,
                 m_encodingWasChosenByUser ? TextResourceDecoder::UserChosenEncoding : TextResourceDecoder::EncodingFromHTTPHeader);
index 8f78ef4..ee81326 100644 (file)
@@ -26,6 +26,8 @@
 #include "DOMImplementation.h"
 #include "HTMLNames.h"
 #include "TextCodec.h"
+#include "TextEncoding.h"
+#include "TextEncodingDetector.h"
 #include "TextEncodingRegistry.h"
 #include <wtf/ASCIICType.h>
 #include <wtf/StringExtras.h>
@@ -321,15 +323,17 @@ const TextEncoding& TextResourceDecoder::defaultEncoding(ContentType contentType
     return specifiedDefaultEncoding;
 }
 
-TextResourceDecoder::TextResourceDecoder(const String& mimeType, const TextEncoding& specifiedDefaultEncoding)
+TextResourceDecoder::TextResourceDecoder(const String& mimeType, const TextEncoding& specifiedDefaultEncoding, bool usesEncodingDetector)
     : m_contentType(determineContentType(mimeType))
     , m_encoding(defaultEncoding(m_contentType, specifiedDefaultEncoding))
     , m_source(DefaultEncoding)
+    , m_hintEncoding(0)
     , m_checkedForBOM(false)
     , m_checkedForCSSCharset(false)
     , m_checkedForHeadCharset(false)
     , m_useLenientXMLDecoding(false)
     , m_sawError(false)
+    , m_usesEncodingDetector(usesEncodingDetector)
 {
 }
 
@@ -451,7 +455,7 @@ size_t TextResourceDecoder::checkForBOM(const char* data, size_t len)
 
 bool TextResourceDecoder::checkForCSSCharset(const char* data, size_t len, bool& movedDataToBuffer)
 {
-    if (m_source != DefaultEncoding) {
+    if (m_source != DefaultEncoding && m_source != EncodingFromParentFrame) {
         m_checkedForCSSCharset = true;
         return true;
     }
@@ -532,7 +536,7 @@ const int bytesToCheckUnconditionally = 1024; // That many input bytes will be c
 
 bool TextResourceDecoder::checkForHeadCharset(const char* data, size_t len, bool& movedDataToBuffer)
 {
-    if (m_source != DefaultEncoding) {
+    if (m_source != DefaultEncoding && m_source != EncodingFromParentFrame) {
         m_checkedForHeadCharset = true;
         return true;
     }
@@ -759,6 +763,23 @@ void TextResourceDecoder::detectJapaneseEncoding(const char* data, size_t len)
     }
 }
 
+// We use the encoding detector in two cases:
+//   1. Encoding detector is turned ON and no other encoding source is
+//      available (that is, it's DefaultEncoding).
+//   2. Encoding detector is turned ON and the encoding is set to
+//      the encoding of the parent frame, which is also auto-detected.
+//   Note that condition #2 is NOT satisfied unless parent-child frame
+//   relationship is compliant to the same-origin policy. If they're from
+//   different domains, |m_source| would not be set to EncodingFromParentFrame
+//   in the first place. 
+bool TextResourceDecoder::shouldAutoDetect() const
+{
+    // Just checking m_hintEncoding suffices here because it's only set
+    // in setHintEncoding when the source is AutoDetectedEncoding.
+    return m_usesEncodingDetector
+        && (m_source == DefaultEncoding || (m_source == EncodingFromParentFrame && m_hintEncoding)); 
+}
+
 String TextResourceDecoder::decode(const char* data, size_t len)
 {
     size_t lengthOfBOM = 0;
@@ -775,10 +796,24 @@ String TextResourceDecoder::decode(const char* data, size_t len)
         if (!checkForHeadCharset(data, len, movedDataToBuffer))
             return "";
 
-    // Do the auto-detect if our default encoding is one of the Japanese ones.
-    // FIXME: It seems wrong to change our encoding downstream after we have already done some decoding.
+    // FIXME: It seems wrong to change our encoding downstream after
+    // we have already done some decoding. However, it's not possible
+    // to avoid in a sense in two cases below because triggering conditions
+    // for both cases depend on the information that won't be available
+    // until we do partial read. 
+    // The first case had better be removed altogether (see bug 21990)
+    // or at least be made to be invoked only when the encoding detection
+    // is turned on. 
+    // Do the auto-detect 1) using Japanese detector if our default encoding is
+    // one of the Japanese detector or 2) using detectTextEncoding if encoding
+    // detection is turned on.
     if (m_source != UserChosenEncoding && m_source != AutoDetectedEncoding && m_encoding.isJapanese())
         detectJapaneseEncoding(data, len);
+    else if (shouldAutoDetect()) {
+        TextEncoding detectedEncoding;
+        if (detectTextEncoding(data, len, m_hintEncoding, &detectedEncoding))
+            setEncoding(detectedEncoding, AutoDetectedEncoding);
+    }
 
     ASSERT(m_encoding.isValid());
 
@@ -801,6 +836,17 @@ String TextResourceDecoder::decode(const char* data, size_t len)
 
 String TextResourceDecoder::flush()
 {
+   // If we can not identify the encoding even after a document is completely
+   // loaded, we need to detect the encoding if other conditions for
+   // autodetection is satisfied.
+    if (m_buffer.size() && shouldAutoDetect()
+        && ((!m_checkedForHeadCharset && (m_contentType == HTML || m_contentType == XML)) || (!m_checkedForCSSCharset && (m_contentType == CSS)))) {
+         TextEncoding detectedEncoding;
+         if (detectTextEncoding(m_buffer.data(), m_buffer.size(),
+                                m_hintEncoding, &detectedEncoding))
+             setEncoding(detectedEncoding, AutoDetectedEncoding);
+    }
+
     if (!m_codec)
         m_codec.set(newTextCodec(m_encoding).release());
 
index bd68933..368741f 100644 (file)
@@ -36,12 +36,13 @@ public:
         EncodingFromMetaTag,
         EncodingFromCSSCharset,
         EncodingFromHTTPHeader,
-        UserChosenEncoding
+        UserChosenEncoding,
+        EncodingFromParentFrame
     };
 
-    static PassRefPtr<TextResourceDecoder> create(const String& mimeType, const TextEncoding& defaultEncoding = TextEncoding())
+    static PassRefPtr<TextResourceDecoder> create(const String& mimeType, const TextEncoding& defaultEncoding = TextEncoding(), bool usesEncodingDetector = false)
     {
-        return adoptRef(new TextResourceDecoder(mimeType, defaultEncoding));
+        return adoptRef(new TextResourceDecoder(mimeType, defaultEncoding, usesEncodingDetector));
     }
     ~TextResourceDecoder();
 
@@ -51,11 +52,20 @@ public:
     String decode(const char* data, size_t length);
     String flush();
 
+    void setHintEncoding(const TextResourceDecoder* hintDecoder)
+    {
+        // hintEncoding is for use with autodetection, which should be 
+        // only invoked  when hintEncoding comes from auto-detection.
+        if (hintDecoder->m_source == AutoDetectedEncoding)
+            m_hintEncoding = hintDecoder->encoding().name();
+    }
+   
     void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; }
     bool sawError() const { return m_sawError; }
 
 private:
-    TextResourceDecoder(const String& mimeType, const TextEncoding& defaultEncoding);
+    TextResourceDecoder(const String& mimeType, const TextEncoding& defaultEncoding,
+                        bool usesEncodingDetector);
 
     enum ContentType { PlainText, HTML, XML, CSS }; // PlainText only checks for BOM.
     static ContentType determineContentType(const String& mimeType);
@@ -65,17 +75,20 @@ private:
     bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer);
     bool checkForHeadCharset(const char*, size_t, bool& movedDataToBuffer);
     void detectJapaneseEncoding(const char*, size_t);
+    bool shouldAutoDetect() const;
 
     ContentType m_contentType;
     TextEncoding m_encoding;
     OwnPtr<TextCodec> m_codec;
     EncodingSource m_source;
+    const char* m_hintEncoding;
     Vector<char> m_buffer;
     bool m_checkedForBOM;
     bool m_checkedForCSSCharset;
     bool m_checkedForHeadCharset;
     bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors.
     bool m_sawError;
+    bool m_usesEncodingDetector;
 };
 
 }
index 567477e..e124ef6 100644 (file)
@@ -87,6 +87,7 @@ Settings::Settings(Page* page)
     , m_shouldPaintCustomScrollbars(false)
     , m_zoomsTextOnly(false)
     , m_enforceCSSMIMETypeInStrictMode(true)
+    , m_usesEncodingDetector(false)
     , m_maximumDecodedImageSize(std::numeric_limits<size_t>::max())
     , m_allowScriptsToCloseWindows(false)
 {
@@ -424,6 +425,11 @@ void Settings::setShouldPaintNativeControls(bool shouldPaintNativeControls)
 }
 #endif
 
+void Settings::setUsesEncodingDetector(bool usesEncodingDetector)
+{
+    m_usesEncodingDetector = usesEncodingDetector;
+}
+
 void Settings::setAllowScriptsToCloseWindows(bool allowScriptsToCloseWindows)
 {
     m_allowScriptsToCloseWindows = allowScriptsToCloseWindows;
index 7e8b597..47dab1e 100644 (file)
@@ -115,6 +115,9 @@ namespace WebCore {
         
         void setDefaultTextEncodingName(const String&);
         const String& defaultTextEncodingName() const { return m_defaultTextEncodingName; }
+        
+        void setUsesEncodingDetector(bool);
+        bool usesEncodingDetector() const { return m_usesEncodingDetector; }
 
         void setUserStyleSheetLocation(const KURL&);
         const KURL& userStyleSheetLocation() const { return m_userStyleSheetLocation; }
@@ -264,6 +267,7 @@ namespace WebCore {
         bool m_shouldPaintCustomScrollbars : 1;
         bool m_zoomsTextOnly : 1;
         bool m_enforceCSSMIMETypeInStrictMode : 1;
+        bool m_usesEncodingDetector : 1;
         size_t m_maximumDecodedImageSize;
         bool m_allowScriptsToCloseWindows : 1;
 
index ead7c18..dbf2427 100644 (file)
@@ -1,3 +1,20 @@
+2009-03-26  Jungshik Shin  <jshin@chromium.org>
+
+        Reviewed by Alexey Proskuryakov.
+
+        Add WebPreferences for encoding autodetection on Mac.
+  
+        http://bugs.webkit.org/show_bug.cgi?id=16482
+
+        * WebView/WebPreferenceKeysPrivate.h:
+        * WebView/WebPreferences.mm:
+        (+[WebPreferences initialize]):
+        (-[WebPreferences usesEncodingDetector]):
+        (-[WebPreferences setUsesEncodingDetector:]):
+        * WebView/WebPreferencesPrivate.h:
+        * WebView/WebView.mm:
+        (-[WebView _preferencesChangedNotification:]):
+
 2009-03-26  Anders Carlsson  <andersca@apple.com>
 
         Reviewed by Dan Bernstein.
index cb77609..e22113c 100644 (file)
@@ -41,6 +41,7 @@
 #define WebKitDefaultFontSizePreferenceKey @"WebKitDefaultFontSize"
 #define WebKitDefaultFixedFontSizePreferenceKey @"WebKitDefaultFixedFontSize"
 #define WebKitDefaultTextEncodingNamePreferenceKey @"WebKitDefaultTextEncodingName"
+#define WebKitUsesEncodingDetectorPreferenceKey @"WebKitUsesEncodingDetector"
 #define WebKitUserStyleSheetEnabledPreferenceKey @"WebKitUserStyleSheetEnabledPreferenceKey"
 #define WebKitUserStyleSheetLocationPreferenceKey @"WebKitUserStyleSheetLocationPreferenceKey"
 #define WebKitShouldPrintBackgroundsPreferenceKey @"WebKitShouldPrintBackgroundsPreferenceKey"
index 25b6460..6db14bd 100644 (file)
@@ -306,6 +306,7 @@ static WebCacheModel cacheModelForMainBundle(void)
         @"16",                          WebKitDefaultFontSizePreferenceKey,
         @"13",                          WebKitDefaultFixedFontSizePreferenceKey,
         @"ISO-8859-1",                  WebKitDefaultTextEncodingNamePreferenceKey,
+        [NSNumber numberWithBool:NO],   WebKitUsesEncodingDetectorPreferenceKey,
         [NSNumber numberWithBool:NO],   WebKitUserStyleSheetEnabledPreferenceKey,
         @"",                            WebKitUserStyleSheetLocationPreferenceKey,
         [NSNumber numberWithBool:NO],   WebKitShouldPrintBackgroundsPreferenceKey,
@@ -864,6 +865,16 @@ static WebCacheModel cacheModelForMainBundle(void)
     _private->automaticallyDetectsCacheModel = automaticallyDetectsCacheModel;
 }
 
+- (BOOL)usesEncodingDetector
+{
+    return [self _boolValueForKey: WebKitUsesEncodingDetectorPreferenceKey];
+}
+
+- (void)setUsesEncodingDetector:(BOOL)flag
+{
+    [self _setBoolValue: flag forKey: WebKitUsesEncodingDetectorPreferenceKey];
+}
+
 - (BOOL)isWebSecurityEnabled
 {
     return [self _boolValueForKey: WebKitWebSecurityEnabledPreferenceKey];
index e1d2a13..ae94cce 100644 (file)
@@ -59,6 +59,9 @@ extern NSString *WebPreferencesRemovedNotification;
 - (BOOL)applicationChromeModeEnabled;
 - (void)setApplicationChromeModeEnabled:(BOOL)flag;
 
+- (BOOL)usesEncodingDetector;
+- (void)setUsesEncodingDetector:(BOOL)flag;
+
 - (BOOL)respectStandardStyleKeyEquivalents;
 - (void)setRespectStandardStyleKeyEquivalents:(BOOL)flag;
 
index 5a81783..de3628a 100644 (file)
@@ -1332,6 +1332,7 @@ static bool runningTigerMail()
     settings->setDefaultFixedFontSize([preferences defaultFixedFontSize]);
     settings->setDefaultFontSize([preferences defaultFontSize]);
     settings->setDefaultTextEncodingName([preferences defaultTextEncodingName]);
+    settings->setUsesEncodingDetector([preferences usesEncodingDetector]);
     settings->setFantasyFontFamily([preferences fantasyFontFamily]);
     settings->setFixedFontFamily([preferences fixedFontFamily]);
     settings->setForceFTPDirectoryListings([preferences _forceFTPDirectoryListings]);