Reviewed by Darin.
authormjs <mjs@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Mon, 27 Feb 2006 09:17:06 +0000 (09:17 +0000)
committermjs <mjs@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Mon, 27 Feb 2006 09:17:06 +0000 (09:17 +0000)
        - platformize KWQTextCodec
        http://bugzilla.opendarwin.org/show_bug.cgi?id=7487

        I renamed QTextCodec to TextEncoding and QTextDecoder to
        StreamingTextDecoder. I also made a bunch of other changes to make
        the API more sensible.

        * ForwardingHeaders/qtextcodec.h: Removed.
        * WebCore.xcodeproj/project.pbxproj:
        * bridge/mac/MacFrame.mm:
        * bridge/mac/WebCoreFrameBridge.mm:
        (-[WebCoreFrameBridge URLWithAttributeString:]):
        (-[WebCoreFrameBridge textEncoding]):
        (+[WebCoreFrameBridge stringWithData:textEncoding:]):
        (+[WebCoreFrameBridge stringWithData:textEncodingName:]):
        * dom/DocumentImpl.cpp:
        (WebCore::DocumentImpl::completeURL):
        * khtml/ecma/kjs_dom.cpp:
        (KJS::DOMDocument::getValueProperty):
        (KJS::DOMDocument::putValueProperty):
        * khtml/html/FormDataList.cpp:
        (DOM::FormDataList::FormDataList):
        (DOM::FormDataList::appendString):
        * khtml/html/FormDataList.h:
        * khtml/html/HTMLFormElementImpl.cpp:
        (WebCore::HTMLFormElementImpl::HTMLFormElementImpl):
        (WebCore::HTMLFormElementImpl::formData):
        * khtml/html/htmltokenizer.cpp:
        * khtml/misc/decoder.cpp:
        (Decoder::Decoder):
        (Decoder::setEncodingName):
        (Decoder::encodingName):
        (Decoder::decode):
        * khtml/misc/decoder.h:
        (khtml::Decoder::):
        (khtml::Decoder::visuallyOrdered):
        (khtml::Decoder::encoding):
        * khtml/xsl/xslt_processorimpl.cpp:
        (DOM::XSLTProcessorImpl::createDocumentFromSource):
        * kwq/KWQCharsets.cpp: Removed.
        * kwq/KWQCharsets.h: Removed.
        * kwq/KWQTextCodec.cpp: Removed.
        * kwq/KWQTextCodec.h: Removed.
        * kwq/WebCoreTextDecoder.h:
        * kwq/WebCoreTextDecoder.mm:
        * kwq/can-convert.mm: Removed.
        * kwq/character-sets.txt: Removed.
        * kwq/mac-encodings.txt: Removed.
        * kwq/make-charset-table.pl: Removed.
        * loader/CachedCSSStyleSheet.cpp:
        (WebCore::CachedCSSStyleSheet::CachedCSSStyleSheet):
        (WebCore::CachedCSSStyleSheet::setCharset):
        (WebCore::CachedCSSStyleSheet::data):
        * loader/CachedCSSStyleSheet.h:
        * loader/CachedScript.cpp:
        (WebCore::CachedScript::CachedScript):
        (WebCore::CachedScript::setCharset):
        (WebCore::CachedScript::data):
        * loader/CachedScript.h:
        * loader/CachedXSLStyleSheet.cpp:
        (WebCore::CachedXSLStyleSheet::setCharset):
        * page/Frame.cpp:
        (WebCore::Frame::write):
        (WebCore::Frame::gotoAnchor):
        (WebCore::Frame::encoding):
        (WebCore::Frame::backslashAsCurrencySymbol):
        * platform/CharsetNames.cpp: Added.
        (KXMLCore::):
        (WebCore::EncodingHash::equal):
        (WebCore::EncodingHash::hash):
        (WebCore::buildDictionaries):
        (WebCore::textEncodingIDFromCharsetName):
        (WebCore::charsetNameFromTextEncodingID):
        * platform/CharsetNames.h: Added.
        * platform/KURL.cpp:
        (KURL::KURL):
        (KURL::decode_string):
        (encodeRelativeString):
        * platform/KURL.h:
        * platform/QString.cpp:
        (QString::fromUtf8):
        * platform/StreamingTextDecoder.cpp: Added.
        (WebCore::StreamingTextDecoder::StreamingTextDecoder):
        (WebCore::StreamingTextDecoder::convertLatin1):
        (WebCore::StreamingTextDecoder::~StreamingTextDecoder):
        (WebCore::StreamingTextDecoder::convertUTF16):
        (WebCore::effectiveEncoding):
        (WebCore::StreamingTextDecoder::createICUConverter):
        (WebCore::unwanted):
        (WebCore::StreamingTextDecoder::appendOmittingUnwanted):
        (WebCore::StreamingTextDecoder::convertUsingICU):
        (WebCore::StreamingTextDecoder::convert):
        (WebCore::StreamingTextDecoder::toUnicode):
        * platform/StreamingTextDecoder.h: Added.
        (WebCore::StreamingTextDecoder::convert):
        * platform/TextEncoding.cpp: Added.
        (WebCore::TextEncoding::TextEncoding):
        (WebCore::TextEncoding::name):
        (WebCore::effectiveEncoding):
        (WebCore::TextEncoding::backslashAsCurrencySymbol):
        (WebCore::TextEncoding::toUnicode):
        * platform/TextEncoding.h: Added.
        (WebCore::):
        (WebCore::TextEncoding::):
        (WebCore::TextEncoding::TextEncoding):
        (WebCore::TextEncoding::isValid):
        (WebCore::TextEncoding::usesVisualOrdering):
        (WebCore::TextEncoding::isJapanese):
        (WebCore::TextEncoding::encodingID):
        (WebCore::TextEncoding::flags):
        (WebCore::operator==):
        (WebCore::operator!=):
        * platform/character-sets.txt: Added.
        * platform/mac/ExtraCFEncodings.h: Added.
        * platform/mac/QStringMac.mm:
        * platform/mac/TextEncodingMac.cpp: Added.
        (WebCore::effectiveEncoding):
        (WebCore::TextEncoding::fromUnicode):
        * platform/mac/can-convert.mm: Added.
        * platform/mac/mac-encodings.txt: Added.
        * platform/make-charset-table.pl: Added.
        * platform/win/win-encodings.txt: Added.
        * rendering/render_object.cpp:
        (WebCore::RenderObject::backslashAsCurrencySymbol):
        * xml/xmlhttprequest.cpp:
        (WebCore::XMLHttpRequest::send):
        (WebCore::XMLHttpRequest::slotData):

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@13002 268f45cc-cd09-0410-ab3c-d52691b4dbfc

44 files changed:
WebCore/ChangeLog
WebCore/ForwardingHeaders/qtextcodec.h [deleted file]
WebCore/WebCore.xcodeproj/project.pbxproj
WebCore/bridge/mac/MacFrame.mm
WebCore/bridge/mac/WebCoreFrameBridge.mm
WebCore/dom/DocumentImpl.cpp
WebCore/khtml/ecma/kjs_dom.cpp
WebCore/khtml/html/FormDataList.cpp
WebCore/khtml/html/FormDataList.h
WebCore/khtml/html/HTMLFormElementImpl.cpp
WebCore/khtml/html/htmltokenizer.cpp
WebCore/khtml/misc/decoder.cpp
WebCore/khtml/misc/decoder.h
WebCore/khtml/xsl/xslt_processorimpl.cpp
WebCore/kwq/KWQCharsets.cpp [deleted file]
WebCore/kwq/KWQTextCodec.cpp [deleted file]
WebCore/kwq/KWQTextCodec.h [deleted file]
WebCore/kwq/WebCoreTextDecoder.h
WebCore/kwq/WebCoreTextDecoder.mm
WebCore/loader/CachedCSSStyleSheet.cpp
WebCore/loader/CachedCSSStyleSheet.h
WebCore/loader/CachedScript.cpp
WebCore/loader/CachedScript.h
WebCore/loader/CachedXSLStyleSheet.cpp
WebCore/page/Frame.cpp
WebCore/platform/CharsetNames.cpp [new file with mode: 0644]
WebCore/platform/CharsetNames.h [moved from WebCore/kwq/KWQCharsets.h with 71% similarity]
WebCore/platform/KURL.cpp
WebCore/platform/KURL.h
WebCore/platform/QString.cpp
WebCore/platform/StreamingTextDecoder.cpp [new file with mode: 0644]
WebCore/platform/StreamingTextDecoder.h [new file with mode: 0644]
WebCore/platform/TextEncoding.cpp [new file with mode: 0644]
WebCore/platform/TextEncoding.h [new file with mode: 0644]
WebCore/platform/character-sets.txt [moved from WebCore/kwq/character-sets.txt with 100% similarity]
WebCore/platform/mac/ExtraCFEncodings.h [new file with mode: 0644]
WebCore/platform/mac/QStringMac.mm
WebCore/platform/mac/TextEncodingMac.cpp [new file with mode: 0644]
WebCore/platform/mac/can-convert.mm [moved from WebCore/kwq/can-convert.mm with 100% similarity]
WebCore/platform/mac/mac-encodings.txt [moved from WebCore/kwq/mac-encodings.txt with 97% similarity]
WebCore/platform/make-charset-table.pl [moved from WebCore/kwq/make-charset-table.pl with 68% similarity]
WebCore/platform/win/win-encodings.txt [new file with mode: 0644]
WebCore/rendering/render_object.cpp
WebCore/xml/xmlhttprequest.cpp

index ba69f39..59445b2 100644 (file)
@@ -1,3 +1,135 @@
+2006-02-26  Maciej Stachowiak  <mjs@apple.com>
+
+        Reviewed by Darin.
+
+        - platformize KWQTextCodec
+        http://bugzilla.opendarwin.org/show_bug.cgi?id=7487
+        
+        I renamed QTextCodec to TextEncoding and QTextDecoder to
+        StreamingTextDecoder. I also made a bunch of other changes to make
+        the API more sensible.
+        
+        * ForwardingHeaders/qtextcodec.h: Removed.
+        * WebCore.xcodeproj/project.pbxproj:
+        * bridge/mac/MacFrame.mm:
+        * bridge/mac/WebCoreFrameBridge.mm:
+        (-[WebCoreFrameBridge URLWithAttributeString:]):
+        (-[WebCoreFrameBridge textEncoding]):
+        (+[WebCoreFrameBridge stringWithData:textEncoding:]):
+        (+[WebCoreFrameBridge stringWithData:textEncodingName:]):
+        * dom/DocumentImpl.cpp:
+        (WebCore::DocumentImpl::completeURL):
+        * khtml/ecma/kjs_dom.cpp:
+        (KJS::DOMDocument::getValueProperty):
+        (KJS::DOMDocument::putValueProperty):
+        * khtml/html/FormDataList.cpp:
+        (DOM::FormDataList::FormDataList):
+        (DOM::FormDataList::appendString):
+        * khtml/html/FormDataList.h:
+        * khtml/html/HTMLFormElementImpl.cpp:
+        (WebCore::HTMLFormElementImpl::HTMLFormElementImpl):
+        (WebCore::HTMLFormElementImpl::formData):
+        * khtml/html/htmltokenizer.cpp:
+        * khtml/misc/decoder.cpp:
+        (Decoder::Decoder):
+        (Decoder::setEncodingName):
+        (Decoder::encodingName):
+        (Decoder::decode):
+        * khtml/misc/decoder.h:
+        (khtml::Decoder::):
+        (khtml::Decoder::visuallyOrdered):
+        (khtml::Decoder::encoding):
+        * khtml/xsl/xslt_processorimpl.cpp:
+        (DOM::XSLTProcessorImpl::createDocumentFromSource):
+        * kwq/KWQCharsets.cpp: Removed.
+        * kwq/KWQCharsets.h: Removed.
+        * kwq/KWQTextCodec.cpp: Removed.
+        * kwq/KWQTextCodec.h: Removed.
+        * kwq/WebCoreTextDecoder.h:
+        * kwq/WebCoreTextDecoder.mm:
+        * kwq/can-convert.mm: Removed.
+        * kwq/character-sets.txt: Removed.
+        * kwq/mac-encodings.txt: Removed.
+        * kwq/make-charset-table.pl: Removed.
+        * loader/CachedCSSStyleSheet.cpp:
+        (WebCore::CachedCSSStyleSheet::CachedCSSStyleSheet):
+        (WebCore::CachedCSSStyleSheet::setCharset):
+        (WebCore::CachedCSSStyleSheet::data):
+        * loader/CachedCSSStyleSheet.h:
+        * loader/CachedScript.cpp:
+        (WebCore::CachedScript::CachedScript):
+        (WebCore::CachedScript::setCharset):
+        (WebCore::CachedScript::data):
+        * loader/CachedScript.h:
+        * loader/CachedXSLStyleSheet.cpp:
+        (WebCore::CachedXSLStyleSheet::setCharset):
+        * page/Frame.cpp:
+        (WebCore::Frame::write):
+        (WebCore::Frame::gotoAnchor):
+        (WebCore::Frame::encoding):
+        (WebCore::Frame::backslashAsCurrencySymbol):
+        * platform/CharsetNames.cpp: Added.
+        (KXMLCore::):
+        (WebCore::EncodingHash::equal):
+        (WebCore::EncodingHash::hash):
+        (WebCore::buildDictionaries):
+        (WebCore::textEncodingIDFromCharsetName):
+        (WebCore::charsetNameFromTextEncodingID):
+        * platform/CharsetNames.h: Added.
+        * platform/KURL.cpp:
+        (KURL::KURL):
+        (KURL::decode_string):
+        (encodeRelativeString):
+        * platform/KURL.h:
+        * platform/QString.cpp:
+        (QString::fromUtf8):
+        * platform/StreamingTextDecoder.cpp: Added.
+        (WebCore::StreamingTextDecoder::StreamingTextDecoder):
+        (WebCore::StreamingTextDecoder::convertLatin1):
+        (WebCore::StreamingTextDecoder::~StreamingTextDecoder):
+        (WebCore::StreamingTextDecoder::convertUTF16):
+        (WebCore::effectiveEncoding):
+        (WebCore::StreamingTextDecoder::createICUConverter):
+        (WebCore::unwanted):
+        (WebCore::StreamingTextDecoder::appendOmittingUnwanted):
+        (WebCore::StreamingTextDecoder::convertUsingICU):
+        (WebCore::StreamingTextDecoder::convert):
+        (WebCore::StreamingTextDecoder::toUnicode):
+        * platform/StreamingTextDecoder.h: Added.
+        (WebCore::StreamingTextDecoder::convert):
+        * platform/TextEncoding.cpp: Added.
+        (WebCore::TextEncoding::TextEncoding):
+        (WebCore::TextEncoding::name):
+        (WebCore::effectiveEncoding):
+        (WebCore::TextEncoding::backslashAsCurrencySymbol):
+        (WebCore::TextEncoding::toUnicode):
+        * platform/TextEncoding.h: Added.
+        (WebCore::):
+        (WebCore::TextEncoding::):
+        (WebCore::TextEncoding::TextEncoding):
+        (WebCore::TextEncoding::isValid):
+        (WebCore::TextEncoding::usesVisualOrdering):
+        (WebCore::TextEncoding::isJapanese):
+        (WebCore::TextEncoding::encodingID):
+        (WebCore::TextEncoding::flags):
+        (WebCore::operator==):
+        (WebCore::operator!=):
+        * platform/character-sets.txt: Added.
+        * platform/mac/ExtraCFEncodings.h: Added.
+        * platform/mac/QStringMac.mm:
+        * platform/mac/TextEncodingMac.cpp: Added.
+        (WebCore::effectiveEncoding):
+        (WebCore::TextEncoding::fromUnicode):
+        * platform/mac/can-convert.mm: Added.
+        * platform/mac/mac-encodings.txt: Added.
+        * platform/make-charset-table.pl: Added.
+        * platform/win/win-encodings.txt: Added.
+        * rendering/render_object.cpp:
+        (WebCore::RenderObject::backslashAsCurrencySymbol):
+        * xml/xmlhttprequest.cpp:
+        (WebCore::XMLHttpRequest::send):
+        (WebCore::XMLHttpRequest::slotData):
+
 2006-02-27  Darin Adler  <darin@apple.com>
 
         - fix Windows build
diff --git a/WebCore/ForwardingHeaders/qtextcodec.h b/WebCore/ForwardingHeaders/qtextcodec.h
deleted file mode 100644 (file)
index d1b0973..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include "KWQTextCodec.h"
index 341880f..09248c9 100644 (file)
                6552E7AB096AA11B0006F248 /* WebCoreFrameNamespaces.m in Sources */ = {isa = PBXBuildFile; fileRef = 6552E7A9096AA11B0006F248 /* WebCoreFrameNamespaces.m */; };
                65743B52097076F8001E7CEF /* RenderSVGText.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 65743B50097076F8001E7CEF /* RenderSVGText.cpp */; };
                65743B53097076F8001E7CEF /* RenderSVGText.h in Headers */ = {isa = PBXBuildFile; fileRef = 65743B51097076F8001E7CEF /* RenderSVGText.h */; };
+               6576F9D609B2484A000041F7 /* TextEncodingMac.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 6576F9D509B2484A000041F7 /* TextEncodingMac.cpp */; };
+               657BD74D09AFDC54005A2056 /* StreamingTextDecoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 657BD74909AFDC54005A2056 /* StreamingTextDecoder.cpp */; };
+               657BD74E09AFDC54005A2056 /* StreamingTextDecoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 657BD74A09AFDC54005A2056 /* StreamingTextDecoder.h */; };
+               657BD74F09AFDC54005A2056 /* TextEncoding.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 657BD74B09AFDC54005A2056 /* TextEncoding.cpp */; };
+               657BD75009AFDC54005A2056 /* TextEncoding.h in Headers */ = {isa = PBXBuildFile; fileRef = 657BD74C09AFDC54005A2056 /* TextEncoding.h */; };
                6582A13F09999CD600BEEB6D /* QString.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 6582A13D09999CD600BEEB6D /* QString.cpp */; };
                6582A14009999CD600BEEB6D /* QString.h in Headers */ = {isa = PBXBuildFile; fileRef = 6582A13E09999CD600BEEB6D /* QString.h */; };
                6582A15709999D6D00BEEB6D /* ColorMac.mm in Sources */ = {isa = PBXBuildFile; fileRef = 6582A14909999D6C00BEEB6D /* ColorMac.mm */; };
                65CBFEF90974F607001DAC25 /* FrameView.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 65CBFEF70974F607001DAC25 /* FrameView.cpp */; };
                65CBFEFA0974F607001DAC25 /* FrameView.h in Headers */ = {isa = PBXBuildFile; fileRef = 65CBFEF80974F607001DAC25 /* FrameView.h */; };
                65D1C1CA09932B22000CB324 /* Plugin.h in Headers */ = {isa = PBXBuildFile; fileRef = 65D1C1C909932B22000CB324 /* Plugin.h */; };
+               65F537D909B2B20F00F3DC4A /* CharsetNames.h in Headers */ = {isa = PBXBuildFile; fileRef = 65F537D809B2B20F00F3DC4A /* CharsetNames.h */; };
+               65F5382209B2B55700F3DC4A /* character-sets.txt in Resources */ = {isa = PBXBuildFile; fileRef = 65F5382009B2B55700F3DC4A /* character-sets.txt */; };
+               65F5382309B2B55700F3DC4A /* make-charset-table.pl in Resources */ = {isa = PBXBuildFile; fileRef = 65F5382109B2B55700F3DC4A /* make-charset-table.pl */; };
+               65F5382509B2B57500F3DC4A /* mac-encodings.txt in Resources */ = {isa = PBXBuildFile; fileRef = 65F5382409B2B57500F3DC4A /* mac-encodings.txt */; };
+               65F5386909B2BFDA00F3DC4A /* ExtraCFEncodings.h in Headers */ = {isa = PBXBuildFile; fileRef = 65F5386809B2BFDA00F3DC4A /* ExtraCFEncodings.h */; };
+               65F5386B09B2C05E00F3DC4A /* CharsetNames.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 65F5386A09B2C05E00F3DC4A /* CharsetNames.cpp */; };
                65FEA86909833ADE00BED4AB /* Page.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 65FEA86809833ADE00BED4AB /* Page.cpp */; };
                7E6FEED80898582300C44C3F /* WebCoreScriptDebugger.h in Headers */ = {isa = PBXBuildFile; fileRef = 7E6FEED60898582300C44C3F /* WebCoreScriptDebugger.h */; settings = {ATTRIBUTES = (Private, ); }; };
                7E6FEED90898582300C44C3F /* WebCoreScriptDebugger.mm in Sources */ = {isa = PBXBuildFile; fileRef = 7E6FEED70898582300C44C3F /* WebCoreScriptDebugger.mm */; };
                93F1992008245E59001E9ABC /* dom_xmlimpl.h in Headers */ = {isa = PBXBuildFile; fileRef = F523D30002DE4476018635CA /* dom_xmlimpl.h */; };
                93F1992108245E59001E9ABC /* xml_tokenizer.h in Headers */ = {isa = PBXBuildFile; fileRef = F523D30A02DE4476018635CA /* xml_tokenizer.h */; };
                93F1992908245E59001E9ABC /* KWQCString.h in Headers */ = {isa = PBXBuildFile; fileRef = F587868302DE3B8601EA4122 /* KWQCString.h */; };
-               93F1992A08245E59001E9ABC /* KWQCharsets.h in Headers */ = {isa = PBXBuildFile; fileRef = F58784CC02DE375901EA4122 /* KWQCharsets.h */; };
                93F1992C08245E59001E9ABC /* KWQCollection.h in Headers */ = {isa = PBXBuildFile; fileRef = F587868002DE3B8601EA4122 /* KWQCollection.h */; };
                93F1992E08245E59001E9ABC /* KWQComboBox.h in Headers */ = {isa = PBXBuildFile; fileRef = F587868202DE3B8601EA4122 /* KWQComboBox.h */; };
                93F1992F08245E59001E9ABC /* Cursor.h in Headers */ = {isa = PBXBuildFile; fileRef = F587868402DE3B8601EA4122 /* Cursor.h */; };
                93F1998F08245E59001E9ABC /* KWQSignal.h in Headers */ = {isa = PBXBuildFile; fileRef = F5117BC902F9FFEF018635CE /* KWQSignal.h */; };
                93F1999108245E59001E9ABC /* KWQSlot.h in Headers */ = {isa = PBXBuildFile; fileRef = F5117BCB02F9FFEF018635CE /* KWQSlot.h */; };
                93F1999708245E59001E9ABC /* KWQTextArea.h in Headers */ = {isa = PBXBuildFile; fileRef = F587853502DE375901EA4122 /* KWQTextArea.h */; };
-               93F1999808245E59001E9ABC /* KWQTextCodec.h in Headers */ = {isa = PBXBuildFile; fileRef = F58786BD02DE3B8601EA4122 /* KWQTextCodec.h */; };
                93F1999908245E59001E9ABC /* KWQTextEdit.h in Headers */ = {isa = PBXBuildFile; fileRef = F58786BE02DE3B8601EA4122 /* KWQTextEdit.h */; };
                93F1999A08245E59001E9ABC /* KWQTextField.h in Headers */ = {isa = PBXBuildFile; fileRef = F587851B02DE375901EA4122 /* KWQTextField.h */; };
                93F1999B08245E59001E9ABC /* KWQTextStream.h in Headers */ = {isa = PBXBuildFile; fileRef = F58786BF02DE3B8601EA4122 /* KWQTextStream.h */; };
                93F19A0908245E59001E9ABC /* southWestResizeCursor.tiff in Resources */ = {isa = PBXBuildFile; fileRef = 84C0FA2C052B966D000440A1 /* southWestResizeCursor.tiff */; };
                93F19A0A08245E59001E9ABC /* waitCursor.tiff in Resources */ = {isa = PBXBuildFile; fileRef = 84C0FA2D052B966D000440A1 /* waitCursor.tiff */; };
                93F19A0B08245E59001E9ABC /* westResizeCursor.tiff in Resources */ = {isa = PBXBuildFile; fileRef = 84C0FA2E052B966D000440A1 /* westResizeCursor.tiff */; };
-               93F19A1308245E59001E9ABC /* KWQCharsets.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F58784CD02DE375901EA4122 /* KWQCharsets.cpp */; };
                93F19A1708245E59001E9ABC /* KWQComboBox.mm in Sources */ = {isa = PBXBuildFile; fileRef = F58784D202DE375901EA4122 /* KWQComboBox.mm */; };
                93F19A1808245E59001E9ABC /* KWQCString.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F58784D502DE375901EA4122 /* KWQCString.cpp */; };
                93F19A1E08245E59001E9ABC /* KWQFont.mm in Sources */ = {isa = PBXBuildFile; fileRef = F58784E002DE375901EA4122 /* KWQFont.mm */; };
                93F19A4208245E59001E9ABC /* KWQPainter.mm in Sources */ = {isa = PBXBuildFile; fileRef = F587851F02DE375901EA4122 /* KWQPainter.mm */; };
                93F19A4A08245E59001E9ABC /* KWQRegExp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F587852B02DE375901EA4122 /* KWQRegExp.cpp */; };
                93F19A5108245E59001E9ABC /* KWQTextArea.mm in Sources */ = {isa = PBXBuildFile; fileRef = F587853602DE375901EA4122 /* KWQTextArea.mm */; };
-               93F19A5208245E59001E9ABC /* KWQTextCodec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F587853702DE375901EA4122 /* KWQTextCodec.cpp */; };
                93F19A5308245E59001E9ABC /* KWQTextEdit.mm in Sources */ = {isa = PBXBuildFile; fileRef = F587853802DE375901EA4122 /* KWQTextEdit.mm */; };
                93F19A5408245E59001E9ABC /* KWQTextStream.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F587853902DE375901EA4122 /* KWQTextStream.cpp */; };
                93F19A5608245E59001E9ABC /* KWQValueListImpl.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F587853C02DE375901EA4122 /* KWQValueListImpl.cpp */; };
                6552E7A9096AA11B0006F248 /* WebCoreFrameNamespaces.m */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.objc; path = WebCoreFrameNamespaces.m; sourceTree = "<group>"; };
                65743B50097076F8001E7CEF /* RenderSVGText.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = RenderSVGText.cpp; sourceTree = "<group>"; };
                65743B51097076F8001E7CEF /* RenderSVGText.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = RenderSVGText.h; sourceTree = "<group>"; };
+               6576F9D509B2484A000041F7 /* TextEncodingMac.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = TextEncodingMac.cpp; sourceTree = "<group>"; };
+               657BD74909AFDC54005A2056 /* StreamingTextDecoder.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = StreamingTextDecoder.cpp; path = platform/StreamingTextDecoder.cpp; sourceTree = "<group>"; };
+               657BD74A09AFDC54005A2056 /* StreamingTextDecoder.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = StreamingTextDecoder.h; path = platform/StreamingTextDecoder.h; sourceTree = "<group>"; };
+               657BD74B09AFDC54005A2056 /* TextEncoding.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = TextEncoding.cpp; path = platform/TextEncoding.cpp; sourceTree = "<group>"; };
+               657BD74C09AFDC54005A2056 /* TextEncoding.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = TextEncoding.h; path = platform/TextEncoding.h; sourceTree = "<group>"; };
                6582A13D09999CD600BEEB6D /* QString.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = QString.cpp; path = platform/QString.cpp; sourceTree = "<group>"; };
                6582A13E09999CD600BEEB6D /* QString.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = QString.h; path = platform/QString.h; sourceTree = "<group>"; };
                6582A14909999D6C00BEEB6D /* ColorMac.mm */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.objcpp; path = ColorMac.mm; sourceTree = "<group>"; };
                65CBFEF70974F607001DAC25 /* FrameView.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = FrameView.cpp; sourceTree = "<group>"; };
                65CBFEF80974F607001DAC25 /* FrameView.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = FrameView.h; sourceTree = "<group>"; };
                65D1C1C909932B22000CB324 /* Plugin.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = Plugin.h; sourceTree = "<group>"; };
+               65F537D809B2B20F00F3DC4A /* CharsetNames.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = CharsetNames.h; path = platform/CharsetNames.h; sourceTree = "<group>"; };
+               65F5382009B2B55700F3DC4A /* character-sets.txt */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; name = "character-sets.txt"; path = "platform/character-sets.txt"; sourceTree = "<group>"; };
+               65F5382109B2B55700F3DC4A /* make-charset-table.pl */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text.script.perl; name = "make-charset-table.pl"; path = "platform/make-charset-table.pl"; sourceTree = "<group>"; };
+               65F5382409B2B57500F3DC4A /* mac-encodings.txt */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; path = "mac-encodings.txt"; sourceTree = "<group>"; };
+               65F5386809B2BFDA00F3DC4A /* ExtraCFEncodings.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = ExtraCFEncodings.h; sourceTree = "<group>"; };
+               65F5386A09B2C05E00F3DC4A /* CharsetNames.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = CharsetNames.cpp; path = platform/CharsetNames.cpp; sourceTree = "<group>"; };
                65F80697054D9F86008BF776 /* KWQExceptions.mm */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = KWQExceptions.mm; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
                65FEA86809833ADE00BED4AB /* Page.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = Page.cpp; sourceTree = "<group>"; };
                7E6FEED60898582300C44C3F /* WebCoreScriptDebugger.h */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 4; lastKnownFileType = sourcecode.c.h; path = WebCoreScriptDebugger.h; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
                F523D30802DE4476018635CA /* dom2_viewsimpl.h */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.c.h; path = dom2_viewsimpl.h; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
                F523D30902DE4476018635CA /* xml_tokenizer.cpp */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.cpp.cpp; path = xml_tokenizer.cpp; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
                F523D30A02DE4476018635CA /* xml_tokenizer.h */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.c.h; path = xml_tokenizer.h; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
-               F550D70B02E13281018635CA /* mac-encodings.txt */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = text; path = "mac-encodings.txt"; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
-               F550D70C02E13281018635CA /* make-charset-table.pl */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = text.script.perl; path = "make-charset-table.pl"; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
                F5517DC2031AB56301A80180 /* WebCoreHistory.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = WebCoreHistory.h; path = kwq/WebCoreHistory.h; sourceTree = SOURCE_ROOT; };
                F5517DC3031AB56301A80180 /* WebCoreHistory.m */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.objc; name = WebCoreHistory.m; path = kwq/WebCoreHistory.m; sourceTree = SOURCE_ROOT; };
                F56234DF03026D7301629B47 /* WebCoreEncodings.h */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.c.h; path = WebCoreEncodings.h; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
                F56234E003026D7301629B47 /* WebCoreEncodings.mm */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = WebCoreEncodings.mm; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
                F565AE8502ECA583018635CA /* WebCoreSettings.h */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.c.h; path = WebCoreSettings.h; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
                F565AE8602ECA583018635CA /* WebCoreSettings.mm */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = WebCoreSettings.mm; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
-               F58784CC02DE375901EA4122 /* KWQCharsets.h */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.c.h; path = KWQCharsets.h; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
-               F58784CD02DE375901EA4122 /* KWQCharsets.cpp */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.cpp.cpp; path = KWQCharsets.cpp; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
                F58784D202DE375901EA4122 /* KWQComboBox.mm */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = KWQComboBox.mm; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
                F58784D502DE375901EA4122 /* KWQCString.cpp */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.cpp.cpp; path = KWQCString.cpp; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
                F58784DA02DE375901EA4122 /* KWQDef.h */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.c.h; path = KWQDef.h; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
                F587852B02DE375901EA4122 /* KWQRegExp.cpp */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.cpp.cpp; path = KWQRegExp.cpp; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
                F587853502DE375901EA4122 /* KWQTextArea.h */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.c.h; path = KWQTextArea.h; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
                F587853602DE375901EA4122 /* KWQTextArea.mm */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = KWQTextArea.mm; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
-               F587853702DE375901EA4122 /* KWQTextCodec.cpp */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.cpp.cpp; path = KWQTextCodec.cpp; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
                F587853802DE375901EA4122 /* KWQTextEdit.mm */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = KWQTextEdit.mm; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
                F587853902DE375901EA4122 /* KWQTextStream.cpp */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.cpp.cpp; path = KWQTextStream.cpp; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
                F587853B02DE375901EA4122 /* KWQValueListImpl.h */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.c.h; path = KWQValueListImpl.h; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
                F58786AC02DE3B8601EA4122 /* KWQPtrList.h */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.c.h; path = KWQPtrList.h; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
                F58786AD02DE3B8601EA4122 /* KWQPtrQueue.h */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.c.h; path = KWQPtrQueue.h; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
                F58786B302DE3B8601EA4122 /* KWQRegExp.h */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.c.h; path = KWQRegExp.h; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
-               F58786BD02DE3B8601EA4122 /* KWQTextCodec.h */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.c.h; path = KWQTextCodec.h; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
                F58786BE02DE3B8601EA4122 /* KWQTextEdit.h */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.c.h; path = KWQTextEdit.h; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
                F58786BF02DE3B8601EA4122 /* KWQTextStream.h */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.c.h; path = KWQTextStream.h; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
                F58786C202DE3B8601EA4122 /* KWQValueList.h */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.c.h; path = KWQValueList.h; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
                F5B2A4FD02E2220F018635CB /* WebCoreCache.mm */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = WebCoreCache.mm; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
                F5B2A52B02E22573018635CB /* WebCoreJavaScript.h */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.c.h; path = WebCoreJavaScript.h; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
                F5B2A52C02E22573018635CB /* WebCoreJavaScript.mm */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = WebCoreJavaScript.mm; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
-               F5BFAAC10309CDF6018635CE /* character-sets.txt */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = text; path = "character-sets.txt"; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
                F5C2869302846DCD018635CA /* ApplicationServices.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = ApplicationServices.framework; path = /System/Library/Frameworks/ApplicationServices.framework; sourceTree = "<absolute>"; };
                F5C2869402846DCD018635CA /* Carbon.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Carbon.framework; path = /System/Library/Frameworks/Carbon.framework; sourceTree = "<absolute>"; };
                F5C2869502846DCD018635CA /* Cocoa.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Cocoa.framework; path = /System/Library/Frameworks/Cocoa.framework; sourceTree = "<absolute>"; };
                6582A14809999D6C00BEEB6D /* mac */ = {
                        isa = PBXGroup;
                        children = (
+                               65F5386809B2BFDA00F3DC4A /* ExtraCFEncodings.h */,
+                               65F5382409B2B57500F3DC4A /* mac-encodings.txt */,
+                               6576F9D509B2484A000041F7 /* TextEncodingMac.cpp */,
                                6582A14909999D6C00BEEB6D /* ColorMac.mm */,
                                F58784F002DE375901EA4122 /* CursorMac.mm */,
                                6582A14A09999D6C00BEEB6D /* FloatPointMac.mm */,
                BCF1A5BA097832090061A123 /* platform */ = {
                        isa = PBXGroup;
                        children = (
+                               65F5386A09B2C05E00F3DC4A /* CharsetNames.cpp */,
+                               65F5382009B2B55700F3DC4A /* character-sets.txt */,
+                               65F5382109B2B55700F3DC4A /* make-charset-table.pl */,
                                BC6D6E2509AF943500F59759 /* ScrollView.h */,
                                93032CCC09AEC36200F82A18 /* cg */,
                                6582A14809999D6C00BEEB6D /* mac */,
                                93CD4FD80995F9EA007ECC97 /* AtomicString.h */,
                                93CD4FD90995F9EA007ECC97 /* AtomicStringImpl.h */,
                                BC075985098861A400407B48 /* Brush.h */,
+                               65F537D809B2B20F00F3DC4A /* CharsetNames.h */,
                                BCC8CFC80986CD2400140BF2 /* Color.cpp */,
                                BCC8CFC90986CD2400140BF2 /* Color.h */,
                                BCC8CFCA0986CD2400140BF2 /* ColorData.gperf */,
                                BCFB2F40097A24B500BA703D /* SegmentedString.h */,
                                BCFB2E830979FD4F00BA703D /* Shared.h */,
                                93309EA0099EB78C0056E581 /* SharedTimer.h */,
+                               657BD74909AFDC54005A2056 /* StreamingTextDecoder.cpp */,
+                               657BD74A09AFDC54005A2056 /* StreamingTextDecoder.h */,
                                93CD4FDA0995F9EA007ECC97 /* String.cpp */,
                                93CD4FDC0995F9EA007ECC97 /* StringImpl.cpp */,
                                93CD4FDD0995F9EA007ECC97 /* StringImpl.h */,
                                93E62D990985F41600E1B5E3 /* SystemTime.h */,
                                93309EA1099EB78C0056E581 /* Timer.cpp */,
+                               657BD74B09AFDC54005A2056 /* TextEncoding.cpp */,
+                               657BD74C09AFDC54005A2056 /* TextEncoding.h */,
                                9305B24C098F1B6B00C28855 /* Timer.h */,
                                935C476A09AC4D4F00A6AAB4 /* WheelEvent.h */,
                                9380F47109A11AB4001FDB34 /* Widget.cpp */,
                F523D15002DE4271018635CA /* other */ = {
                        isa = PBXGroup;
                        children = (
-                               F5BFAAC10309CDF6018635CE /* character-sets.txt */,
-                               F58784CD02DE375901EA4122 /* KWQCharsets.cpp */,
-                               F58784CC02DE375901EA4122 /* KWQCharsets.h */,
                                2D90660B0665D937006B6F1A /* KWQClipboard.h */,
                                2D90660C0665D937006B6F1A /* KWQClipboard.mm */,
                                F58784DA02DE375901EA4122 /* KWQDef.h */,
                                F587854002DE375901EA4122 /* KWQView.h */,
                                F587854302DE375901EA4122 /* KWQWindowWidget.h */,
                                F587854402DE375901EA4122 /* KWQWindowWidget.mm */,
-                               F550D70B02E13281018635CA /* mac-encodings.txt */,
-                               F550D70C02E13281018635CA /* make-charset-table.pl */,
                        );
                        name = other;
                        sourceTree = "<group>";
                                BC86FB8E061F5C23006BB822 /* KWQSlider.mm */,
                                F5117BCC02F9FFEF018635CE /* KWQSlot.cpp */,
                                F5117BCB02F9FFEF018635CE /* KWQSlot.h */,
-                               F587853702DE375901EA4122 /* KWQTextCodec.cpp */,
-                               F58786BD02DE3B8601EA4122 /* KWQTextCodec.h */,
                                F58786BE02DE3B8601EA4122 /* KWQTextEdit.h */,
                                F587853802DE375901EA4122 /* KWQTextEdit.mm */,
                                F587853902DE375901EA4122 /* KWQTextStream.cpp */,
                                93F1992008245E59001E9ABC /* dom_xmlimpl.h in Headers */,
                                93F1992108245E59001E9ABC /* xml_tokenizer.h in Headers */,
                                93F1992908245E59001E9ABC /* KWQCString.h in Headers */,
-                               93F1992A08245E59001E9ABC /* KWQCharsets.h in Headers */,
                                93F1992C08245E59001E9ABC /* KWQCollection.h in Headers */,
                                93F1992E08245E59001E9ABC /* KWQComboBox.h in Headers */,
                                93F1992F08245E59001E9ABC /* Cursor.h in Headers */,
                                93F1998F08245E59001E9ABC /* KWQSignal.h in Headers */,
                                93F1999108245E59001E9ABC /* KWQSlot.h in Headers */,
                                93F1999708245E59001E9ABC /* KWQTextArea.h in Headers */,
-                               93F1999808245E59001E9ABC /* KWQTextCodec.h in Headers */,
                                93F1999908245E59001E9ABC /* KWQTextEdit.h in Headers */,
                                93F1999A08245E59001E9ABC /* KWQTextField.h in Headers */,
                                93F1999B08245E59001E9ABC /* KWQTextStream.h in Headers */,
                                6593923809AE4346002C531F /* KURL.h in Headers */,
                                BC6D6DD309AF906600F59759 /* Font.h in Headers */,
                                9353673F09AED79200D35CD6 /* ScrollBarMode.h in Headers */,
+                               657BD74E09AFDC54005A2056 /* StreamingTextDecoder.h in Headers */,
+                               657BD75009AFDC54005A2056 /* TextEncoding.h in Headers */,
                                BC6D6E2609AF943500F59759 /* ScrollView.h in Headers */,
+                               65F537D909B2B20F00F3DC4A /* CharsetNames.h in Headers */,
+                               65F5386909B2BFDA00F3DC4A /* ExtraCFEncodings.h in Headers */,
                                935EB36209B2D7DB00F3AF5B /* TransferJobClient.h in Headers */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                                932FC16B0824A901005B3C75 /* kjs_traversal.lut.h */,
                                932FC16D0824A906005B3C75 /* kjs_views.lut.h */,
                                932FC16F0824A90D005B3C75 /* kjs_window.lut.h */,
-                               932FC0D20824A2C4005B3C75 /* KWQCharsetData.c */,
+                               932FC0D20824A2C4005B3C75 /* CharsetData.cpp */,
                                932FC0DA0824A337005B3C75 /* ColorData.c */,
                                932FC38F0824BA30005B3C75 /* tokenizer.cpp */,
                                932FC1710824A913005B3C75 /* JSXMLHttpRequest.lut.h */,
                                BCEA479F097CAAC80094C9E4 /* cssvalues.in in Resources */,
                                BCC8CFCD0986CD2400140BF2 /* ColorData.gperf in Resources */,
                                A7638A92099592C30007E14F /* WKDisplacementMapFilter.cikernel in Resources */,
+                               65F5382209B2B55700F3DC4A /* character-sets.txt in Resources */,
+                               65F5382309B2B55700F3DC4A /* make-charset-table.pl in Resources */,
+                               65F5382509B2B57500F3DC4A /* mac-encodings.txt in Resources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
                        shellPath = /bin/sh;
                        shellScript = "gperf -CEot -L ANSI-C -k \"*\" -N findDoctypeEntry -F ,PubIDInfo::eAlmostStandards,PubIDInfo::eAlmostStandards khtml/html/doctypes.gperf > \"$DERIVED_FILE_DIR/doctypes.cpp\"";
                };
-               932FC0D20824A2C4005B3C75 /* KWQCharsetData.c */ = {
+               932FC0D20824A2C4005B3C75 /* CharsetData.cpp */ = {
                        isa = PBXShellScriptBuildPhase;
                        buildActionMask = 2147483647;
                        files = (
                        );
                        inputPaths = (
-                               "kwq/make-charset-table.pl",
-                               "kwq/character-sets.txt",
-                               "kwq/mac-encodings.txt",
+                               "platform/make-charset-table.pl",
+                               "platform/character-sets.txt",
+                               "platform/mac/mac-encodings.txt",
                        );
-                       name = KWQCharsetData.c;
+                       name = CharsetData.cpp;
                        outputPaths = (
-                               "$(DERIVED_FILE_DIR)/KWQCharsetData.c",
+                               "$(DERIVED_FILE_DIR)/CharsetData.cpp",
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                        shellPath = /bin/sh;
-                       shellScript = "perl kwq/make-charset-table.pl kwq/character-sets.txt kwq/mac-encodings.txt > \"$DERIVED_FILE_DIR/KWQCharsetData.c\"";
+                       shellScript = "perl platform/make-charset-table.pl platform/character-sets.txt platform/mac/mac-encodings.txt kCFStringEncoding > \"$DERIVED_FILE_DIR/CharsetData.cpp\"";
                };
                932FC0DA0824A337005B3C75 /* ColorData.c */ = {
                        isa = PBXShellScriptBuildPhase;
                        isa = PBXSourcesBuildPhase;
                        buildActionMask = 2147483647;
                        files = (
-                               93F19A1308245E59001E9ABC /* KWQCharsets.cpp in Sources */,
                                93F19A1708245E59001E9ABC /* KWQComboBox.mm in Sources */,
                                93F19A1808245E59001E9ABC /* KWQCString.cpp in Sources */,
                                93F19A1E08245E59001E9ABC /* KWQFont.mm in Sources */,
                                93F19A4208245E59001E9ABC /* KWQPainter.mm in Sources */,
                                93F19A4A08245E59001E9ABC /* KWQRegExp.cpp in Sources */,
                                93F19A5108245E59001E9ABC /* KWQTextArea.mm in Sources */,
-                               93F19A5208245E59001E9ABC /* KWQTextCodec.cpp in Sources */,
                                93F19A5308245E59001E9ABC /* KWQTextEdit.mm in Sources */,
                                93F19A5408245E59001E9ABC /* KWQTextStream.cpp in Sources */,
                                93F19A5608245E59001E9ABC /* KWQValueListImpl.cpp in Sources */,
                                6593923A09AE435C002C531F /* KURLMac.mm in Sources */,
                                BC6D6DD209AF906600F59759 /* Font.cpp in Sources */,
                                9353676B09AED88B00D35CD6 /* ScrollViewMac.mm in Sources */,
+                               657BD74D09AFDC54005A2056 /* StreamingTextDecoder.cpp in Sources */,
+                               657BD74F09AFDC54005A2056 /* TextEncoding.cpp in Sources */,
+                               6576F9D609B2484A000041F7 /* TextEncodingMac.cpp in Sources */,
+                               65F5386B09B2C05E00F3DC4A /* CharsetNames.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
index 642afeb..0459552 100644 (file)
@@ -47,7 +47,7 @@
 #import "KWQPageState.h"
 #import "KWQRegExp.h"
 #import "KWQScrollBar.h"
-#import "KWQTextCodec.h"
+#import "TextEncoding.h"
 #import "KWQWindowWidget.h"
 #import "KeyEvent.h"
 #import "MouseEvent.h"
index af5a24b..e9b0344 100644 (file)
@@ -35,7 +35,7 @@
 #import "HTMLFormElementImpl.h"
 #import "HTMLInputElementImpl.h"
 #import "KWQAccObjectCache.h"
-#import "KWQCharsets.h"
+#import "CharsetNames.h"
 #import "KWQClipboard.h"
 #import "KWQEditCommand.h"
 #import "KWQFont.h"
@@ -43,7 +43,7 @@
 #import "KWQLoader.h"
 #import "KWQPageState.h"
 #import "KWQRenderTreeDebug.h"
-#import "KWQTextCodec.h"
+#import "TextEncoding.h"
 #import "KWQView.h"
 #import "MacFrame.h"
 #import "NodeImpl.h"
@@ -1389,8 +1389,9 @@ static HTMLFormElementImpl *formElementFromDOMElement(DOMElement *element)
     if (!doc) {
         return nil;
     }
+    // FIXME: is parseURL appropriate here?
     QString rel = parseURL(QString::fromNSString(string)).qstring();
-    return KURL(doc->baseURL(), rel, doc->decoder() ? doc->decoder()->codec() : 0).getNSURL();
+    return KURL(doc->completeURL(rel)).getNSURL();
 }
 
 - (BOOL)searchFor:(NSString *)string direction:(BOOL)forward caseSensitive:(BOOL)caseFlag wrap:(BOOL)wrapFlag
@@ -1442,7 +1443,7 @@ static HTMLFormElementImpl *formElementFromDOMElement(DOMElement *element)
 
 - (CFStringEncoding)textEncoding
 {
-    return KWQCFStringEncodingFromIANACharsetName(m_frame->encoding().latin1());
+    return WebCore::TextEncoding(m_frame->encoding().latin1()).encodingID();
 }
 
 - (NSView *)nextKeyView
@@ -1670,15 +1671,15 @@ static HTMLFormElementImpl *formElementFromDOMElement(DOMElement *element)
 
 + (NSString *)stringWithData:(NSData *)data textEncoding:(CFStringEncoding)textEncoding
 {
-    if (textEncoding == kCFStringEncodingInvalidId || textEncoding == kCFStringEncodingISOLatin1) {
+    if (textEncoding == kCFStringEncodingInvalidId)
         textEncoding = kCFStringEncodingWindowsLatin1;
-    }
-    return QTextCodec(textEncoding).toUnicode((const char*)[data bytes], [data length]).getNSString();
+
+    return WebCore::TextEncoding(textEncoding).toUnicode((const char*)[data bytes], [data length]).getNSString();
 }
 
 + (NSString *)stringWithData:(NSData *)data textEncodingName:(NSString *)textEncodingName
 {
-    CFStringEncoding textEncoding = KWQCFStringEncodingFromIANACharsetName([textEncodingName lossyCString]);
+    CFStringEncoding textEncoding = WebCore::TextEncoding([textEncodingName lossyCString]).encodingID();
     return [WebCoreFrameBridge stringWithData:data textEncoding:textEncoding];
 }
 
index f7c986f..fb923a7 100644 (file)
@@ -2497,7 +2497,9 @@ void DocumentImpl::setDecoder(Decoder *decoder)
 
 QString DocumentImpl::completeURL(const QString &URL)
 {
-    return KURL(baseURL(), URL, m_decoder ? m_decoder->codec() : 0).url();
+    if (!m_decoder)
+        return KURL(baseURL(), URL).url();
+    return KURL(baseURL(), URL, m_decoder->encoding()).url();
 }
 
 DOMString DocumentImpl::completeURL(const DOMString &URL)
index 7878b74..f4ed709 100644 (file)
@@ -894,7 +894,7 @@ JSValue *DOMDocument::getValueProperty(ExecState *exec, int token) const
   case ActualEncoding:
   case InputEncoding:
     if (Decoder* decoder = doc.decoder())
-      return jsString(decoder->encoding());
+      return jsString(decoder->encodingName());
     return jsNull();
   case DefaultCharset:
     if (Frame *frame = doc.frame())
@@ -936,7 +936,7 @@ void DOMDocument::putValueProperty(ExecState *exec, int token, JSValue *value, i
       doc.setSelectedStylesheetSet(value->toString(exec).domString());
       break;
     case Charset:
-      doc.decoder()->setEncoding(value->toString(exec).cstring().c_str(), Decoder::UserChosenEncoding);
+      doc.decoder()->setEncodingName(value->toString(exec).cstring().c_str(), Decoder::UserChosenEncoding);
       break;
   }
 }
index d1ec8a2..4468bc9 100644 (file)
 
 #include "config.h"
 #include "FormDataList.h"
-#include <qtextcodec.h>
+#include "TextEncoding.h"
 
 namespace DOM {
 
-FormDataList::FormDataList(QTextCodec *c)
-    : m_codec(c)
+FormDataList::FormDataList(const TextEncoding& c)
+    : m_encoding(c)
 {
 }
 
@@ -91,7 +91,7 @@ static QCString fixLineBreaks(const QCString &s)
 
 void FormDataList::appendString(const QString &s)
 {
-    QCString cstr = fixLineBreaks(m_codec->fromUnicode(s, true));
+    QCString cstr = fixLineBreaks(m_encoding.fromUnicode(s, true));
     cstr.truncate(cstr.length());
     m_list.append(cstr);
 }
index 8aaaea8..8d3ce82 100644 (file)
@@ -30,8 +30,7 @@
 #include <QString.h>
 #include <qvaluelist.h>
 #include "PlatformString.h"
-
-class QTextCodec;
+#include "TextEncoding.h"
 
 namespace DOM {
 
@@ -45,7 +44,7 @@ struct FormDataListItem {
 
 class FormDataList {
 public:
-    FormDataList(QTextCodec *);
+    FormDataList(const TextEncoding&);
 
     void appendData(const DOMString &key, const DOMString &value)
         { appendString(key.qstring()); appendString(value.qstring()); }
@@ -66,7 +65,7 @@ private:
     void appendString(const QCString &s);
     void appendString(const QString &s);
 
-    QTextCodec *m_codec;
+    TextEncoding m_encoding;
     QValueList<FormDataListItem> m_list;
 };
 
index afeb7af..f90350d 100644 (file)
@@ -37,7 +37,7 @@
 
 #include "Frame.h"
 
-#include <qtextcodec.h>
+#include "TextEncoding.h"
 #include "htmlnames.h"
 
 namespace WebCore {
@@ -57,7 +57,6 @@ HTMLFormElementImpl::HTMLFormElementImpl(DocumentImpl *doc)
     m_inreset = false;
     m_enctype = "application/x-www-form-urlencoded";
     m_boundary = "----------0xKhTmLbOuNdArY";
-    m_acceptcharset = "UNKNOWN";
     m_malformed = false;
 }
 
@@ -190,31 +189,26 @@ bool HTMLFormElementImpl::formData(FormData &form_data) const
 {
     QCString enc_string = ""; // used for non-multipart data
 
-    // find out the QTextcodec to use
     QString str = m_acceptcharset.qstring();
     str.replace(',', ' ');
     QStringList charsets = QStringList::split(' ', str);
-    QTextCodec* codec = 0;
+    TextEncoding encoding(InvalidEncoding);
     Frame *frame = getDocument()->frame();
     for (QStringList::Iterator it = charsets.begin(); it != charsets.end(); ++it) {
-        QString enc = (*it);
-        if (enc.contains("UNKNOWN")) {
-            // use standard document encoding
-            enc = "ISO-8859-1";
-            if (frame)
-                enc = frame->encoding();
-        }
-        if ((codec = QTextCodec::codecForName(enc.latin1())))
+        if ((encoding = TextEncoding((*it).latin1())).isValid())
             break;
     }
 
-    if (!codec)
-        codec = QTextCodec::codecForLocale();
-
+    if (!encoding.isValid()) {
+        if (frame)
+            encoding = TextEncoding(frame->encoding().latin1());
+        else
+            encoding = TextEncoding(Latin1Encoding);
+    }
 
     for (unsigned i = 0; i < formElements.size(); ++i) {
         HTMLGenericFormElementImpl* current = formElements[i];
-        FormDataList lst(codec);
+        FormDataList lst(encoding);
 
         if (!current->disabled() && current->appendFormData(lst, m_multipart)) {
             for (QValueListConstIterator<FormDataListItem> it = lst.begin(); it != lst.end(); ++it) {
@@ -256,7 +250,7 @@ bool HTMLFormElementImpl::formData(FormData &form_data) const
                         // things if the filename includes characters you can't encode
                         // in the website's character set.
                         hstr += "; filename=\"";
-                        hstr += codec->fromUnicode(path.mid(path.findRev('/') + 1), true);
+                        hstr += encoding.fromUnicode(path.mid(path.findRev('/') + 1), true);
                         hstr += "\"";
 
                         if(!static_cast<HTMLInputElementImpl*>(current)->value().isEmpty())
index 900afd3..1151e5d 100644 (file)
@@ -84,7 +84,7 @@ static const char titleEnd [] = "</title";
 // There may be better equivalents
 
 // We need this for entities at least. For non-entity text, we could
-// handle this in the text codec.
+// handle this in the text encoding.
 
 // To cover non-entity text, I think this function would need to be called
 // in more places. There seem to be some places that don't call fixUpChar.
index a1d4ad4..0b1d720 100644 (file)
     the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
     Boston, MA 02111-1307, USA.
 */
-//----------------------------------------------------------------------------
-//
-// KDE HTML Widget -- decoder for input stream
 
-//#define DECODE_DEBUG
 
 #include "config.h"
 #include "decoder.h"
@@ -33,7 +29,8 @@
 #include <klocale.h>
 #include <kxmlcore/Assertions.h>
 #include <qregexp.h>
-#include <qtextcodec.h>
+#include "TextEncoding.h"
+#include "StreamingTextDecoder.h"
 
 using namespace WebCore;
 using namespace HTMLNames;
@@ -105,11 +102,6 @@ const unsigned char KanjiCode::kanji_map_sjis[] =
  * Special Thanks to Kenichi Tsuchida
  */
 
-/*
- * Maybe we should use QTextCodec::heuristicContentMatch()
- * But it fails detection. It's not useful.
- */
-
 enum KanjiCode::Type KanjiCode::judge(const char *str, int size)
 {
     enum Type code;
@@ -261,57 +253,39 @@ breakBreak:
 }
 
 Decoder::Decoder() 
+  : m_encoding(Latin1Encoding)
+  , m_decoder(new StreamingTextDecoder(m_encoding))
+  , enc(0)
+  , m_type(DefaultEncoding)
+  , body(false)
+  , beginning(true)
 {
-    m_codec = QTextCodec::codecForName("iso8859-1"); // latin1
-    m_decoder = m_codec->makeDecoder();
-    enc = 0;
-    m_type = DefaultEncoding;
-    body = false;
-    beginning = true;
-    visualRTL = false;
 }
+
 Decoder::~Decoder()
 {
-    delete m_decoder;
 }
 
-void Decoder::setEncoding(const char *_encoding, EncodingType type)
+void Decoder::setEncodingName(const char* _encoding, EncodingSource type)
 {
-#ifdef DECODE_DEBUG
-    kdDebug(6005) << "setEncoding " << _encoding << " " << type << endl;
-#endif
     enc = _encoding;
-#ifdef DECODE_DEBUG
-    kdDebug(6005) << "old encoding is:" << m_codec->name() << endl;
-#endif
     enc = enc.lower();
-#ifdef DECODE_DEBUG
-    kdDebug(6005) << "requesting:" << enc << endl;
-#endif
-    if(enc.isNull() || enc.isEmpty())
+
+    if (enc.isEmpty())
         return;
 
-    QTextCodec *codec = (type == EncodingFromMetaTag || type == EncodingFromXMLHeader)
-        ? QTextCodec::codecForNameEightBitOnly(enc)
-        : QTextCodec::codecForName(enc);
-    if (codec) {
-        enc = codec->name();
-        visualRTL = codec->usesVisualOrdering();
-    }
+    TextEncoding encoding = TextEncoding(enc, type == EncodingFromMetaTag || type == EncodingFromXMLHeader);
 
-    if( codec ) { // in case the codec didn't exist, we keep the old one (fixes some sites specifying invalid codecs)
-        m_codec = codec;
+    // in case the encoding didn't exist, we keep the old one (fixes some sites specifying invalid encodings)
+    if (encoding.isValid()) {
+        enc = encoding.name();
+        m_encoding = encoding;
         m_type = type;
-        delete m_decoder;
-        m_decoder = m_codec->makeDecoder();
+        m_decoder.set(new StreamingTextDecoder(m_encoding));
     }
-    
-#ifdef DECODE_DEBUG
-    kdDebug(6005) << "Decoder::encoding used is " << m_codec->name() << endl;
-#endif
 }
 
-const char *Decoder::encoding() const
+const char* Decoder::encodingName() const
 {
     return enc;
 }
@@ -414,11 +388,10 @@ QString Decoder::decode(const char *data, int len)
             // If we found a BOM, use the encoding it implies.
             if (autoDetectedEncoding != 0) {
                 m_type = AutoDetectedEncoding;
-                m_codec = QTextCodec::codecForName(autoDetectedEncoding);
-                ASSERT(m_codec);
-                enc = m_codec->name();
-                delete m_decoder;
-                m_decoder = m_codec->makeDecoder();
+                m_encoding = TextEncoding(autoDetectedEncoding);
+                ASSERT(m_encoding.isValid());
+                enc = m_encoding.name();
+                m_decoder.set(new StreamingTextDecoder(m_encoding));
             }
         }
         beginning = false;
@@ -474,13 +447,13 @@ QString Decoder::decode(const char *data, int len)
                         int len;
                         int pos = findXMLEncoding(str, len);
                         if (pos != -1)
-                            setEncoding(str.mid(pos, len), EncodingFromXMLHeader);
+                            setEncodingName(str.mid(pos, len), EncodingFromXMLHeader);
                         if (m_type != EncodingFromXMLHeader)
-                            setEncoding("UTF-8", EncodingFromXMLHeader);
+                            setEncodingName("UTF-8", EncodingFromXMLHeader);
                         // continue looking for a charset - it may be specified in an HTTP-Equiv meta
                     } else if (ptr[0] == 0 && ptr[1] == '?' && ptr[2] == 0 && ptr[3] == 'x' && ptr[4] == 0 && ptr[5] == 'm' && ptr[6] == 0 && ptr[7] == 'l') {
                         // UTF-16 without BOM
-                        setEncoding(((ptr - buffer.latin1()) % 2) ? "UTF-16LE" : "UTF-16BE", AutoDetectedEncoding);
+                        setEncodingName(((ptr - buffer.latin1()) % 2) ? "UTF-16LE" : "UTF-16BE", AutoDetectedEncoding);
                         goto found;
                     }
 
@@ -528,10 +501,7 @@ QString Decoder::decode(const char *data, int len)
                                    (str[endpos] != ' ' && str[endpos] != '"' && str[endpos] != '\''
                                     && str[endpos] != ';' && str[endpos] != '>') )
                                 endpos++;
-#ifdef DECODE_DEBUG
-                            kdDebug( 6005 ) << "Decoder: found charset: " << str.mid(pos, endpos-pos) << endl;
-#endif
-                            setEncoding(str.mid(pos, endpos-pos), EncodingFromMetaTag);
+                            setEncodingName(str.mid(pos, endpos-pos), EncodingFromMetaTag);
                             if( m_type == EncodingFromMetaTag ) goto found;
 
                             if ( endpos >= str.length() || str[endpos] == '/' || str[endpos] == '>' ) break;
@@ -544,9 +514,6 @@ QString Decoder::decode(const char *data, int len)
                                (end || tag != htmlTag) && !withinTitle &&
                                (tag != headTag) && isalpha(tmp[0])) {
                         body = true;
-#ifdef DECODE_DEBUG
-                        kdDebug( 6005 ) << "Decoder: no charset found (bailing because of \"" << tag.qstring().ascii() << "\")." << endl;
-#endif
                         goto found;
                     }
                 }
@@ -559,13 +526,10 @@ QString Decoder::decode(const char *data, int len)
 
  found:
     // Do the auto-detect if our default encoding is one of the Japanese ones.
-    if (m_type != UserChosenEncoding && m_type != AutoDetectedEncoding && m_codec && m_codec->isJapanese())
+    if (m_type != UserChosenEncoding && m_type != AutoDetectedEncoding && m_encoding.isJapanese())
     {
-#ifdef DECODE_DEBUG
-        kdDebug( 6005 ) << "Decoder: use auto-detect (" << strlen(data) << ")" << endl;
-#endif
         const char *autoDetectedEncoding;
-        switch ( KanjiCode::judge( data, len ) ) {
+        switch (KanjiCode::judge(data, len)) {
         case KanjiCode::JIS:
             autoDetectedEncoding = "jis7";
             break;
@@ -579,28 +543,23 @@ QString Decoder::decode(const char *data, int len)
             autoDetectedEncoding = NULL;
             break;
         }
-#ifdef DECODE_DEBUG
-        kdDebug( 6005 ) << "Decoder: auto detect encoding is "
-            << (autoDetectedEncoding ? autoDetectedEncoding : "NULL") << endl;
-#endif
         if (autoDetectedEncoding != 0) {
-            setEncoding(autoDetectedEncoding, AutoDetectedEncoding);
+            setEncodingName(autoDetectedEncoding, AutoDetectedEncoding);
         }
     }
 
-    // if we still haven't found an encoding latin1 will be used...
-    // this is according to HTML4.0 specs
-    if (!m_codec)
+    // if we still haven't found an encoding, assume latin1
+    if (!m_encoding.isValid())
     {
-        if(enc.isEmpty()) enc = "iso8859-1";
-        m_codec = QTextCodec::codecForName(enc);
+        if (enc.isEmpty()) 
+            enc = "iso8859-1";
+        m_encoding = TextEncoding(enc);
         // be sure not to crash
-        if(!m_codec) {
+        if (!m_encoding.isValid()) {
             enc = "iso8859-1";
-            m_codec = QTextCodec::codecForName(enc);
+            m_encoding = TextEncoding(Latin1Encoding);
         }
-        delete m_decoder;
-        m_decoder = m_codec->makeDecoder();
+        m_decoder.set(new StreamingTextDecoder(m_encoding));
     }
     QString out;
 
index 64279aa..c4c0747 100644 (file)
 #ifndef KHTMLDECODER_H
 #define KHTMLDECODER_H
 
-#include <QString.h>
-class QTextCodec;
-class QTextDecoder;
+#include <kxmlcore/OwnPtr.h>
+#include "QString.h"
+#include "TextEncoding.h"
 
 namespace khtml {
+
+    class StreamingTextDecoder;    
+    
 /**
  * @internal
  */
 class Decoder : public Shared<Decoder>
 {
 public:
-    enum EncodingType {
+    enum EncodingSource {
         DefaultEncoding,
         AutoDetectedEncoding,
         EncodingFromXMLHeader,
@@ -45,23 +48,21 @@ public:
     Decoder();
     ~Decoder();
 
-    void setEncoding(const char *encoding, EncodingType type);
-    const char *encoding() const;
-
-    QString decode(const char *data, int len);
-
-    bool visuallyOrdered() const { return visualRTL; }
+    void setEncodingName(const char* encoding, EncodingSource type);
+    const char* encodingName() const;
 
-    const QTextCodec *codec() const { return m_codec; }
+    bool visuallyOrdered() const { return m_encoding.usesVisualOrdering(); }
+    const TextEncoding& encoding() const { return m_encoding; }
 
+    QString decode(const char* data, int len);
     QString flush() const;
 
 protected:
-    // codec used for decoding. default is Latin1.
-    QTextCodec *m_codec;
-    QTextDecoder *m_decoder;
+    // encoding used for decoding. default is Latin1.
+    TextEncoding m_encoding;
+    OwnPtr<StreamingTextDecoder> m_decoder;
     QCString enc;
-    EncodingType m_type;
+    EncodingSource m_type;
 
     // Our version of QString works well for all-8-bit characters, and allows null characters.
     // This works better than QCString when there are null characters involved.
@@ -69,7 +70,6 @@ protected:
 
     bool body;
     bool beginning;
-    bool visualRTL;
 };
 
 }
index f3403d2..80c0c94 100644 (file)
@@ -219,7 +219,7 @@ RefPtr<DocumentImpl> XSLTProcessorImpl::createDocumentFromSource(const QString &
     result->determineParseMode(documentSource); // Make sure we parse in the correct mode.
     
     RefPtr<Decoder> decoder = new Decoder;
-    decoder->setEncoding(sourceEncoding.isEmpty() ? "UTF-8" : sourceEncoding.latin1(), Decoder::EncodingFromXMLHeader);
+    decoder->setEncodingName(sourceEncoding.isEmpty() ? "UTF-8" : sourceEncoding.latin1(), Decoder::EncodingFromXMLHeader);
     result->setDecoder(decoder.get());
     
     result->write(documentSource);
diff --git a/WebCore/kwq/KWQCharsets.cpp b/WebCore/kwq/KWQCharsets.cpp
deleted file mode 100644 (file)
index cd497b9..0000000
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Copyright (C) 2003 Apple Computer, Inc.  All rights reserved.
- * Copyright (C) 2005 Alexey Proskuryakov <ap@nypop.com>.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- */
-
-#include "config.h"
-#include "KWQCharsets.h"
-
-#include <unicode/ucnv.h>
-#include <unicode/utypes.h>
-
-struct CharsetEntry {
-    const char *name;
-    CFStringEncoding encoding;
-    KWQEncodingFlags flags;
-};
-
-// Until there's a CFString constant for these encodings, this works.
-// Since they are macros, they won't cause a compile failure even a the CFString constant is added.
-#define kCFStringEncodingBig5_DOSVariant (kTextEncodingBig5 | (kBig5_DOSVariant << 16))
-#define kCFStringEncodingEUC_CN_DOSVariant (kTextEncodingEUC_CN | (kEUC_CN_DOSVariant << 16))
-#define kCFStringEncodingEUC_KR_DOSVariant (kTextEncodingEUC_KR | (kEUC_KR_DOSVariant << 16))
-#define kCFStringEncodingISOLatin10 kTextEncodingISOLatin10
-#define kCFStringEncodingKOI8_U kTextEncodingKOI8_U
-#define kCFStringEncodingShiftJIS_DOSVariant (kTextEncodingShiftJIS | (kShiftJIS_DOSVariant << 16))
-
-// The following autogenerated file includes the charset data.
-#include "KWQCharsetData.c"
-
-static Boolean encodingNamesEqual(const void *value1, const void *value2);
-static CFHashCode encodingNameHash(const void *value);
-
-static const CFDictionaryKeyCallBacks encodingNameKeyCallbacks = { 0, NULL, NULL, NULL, encodingNamesEqual, encodingNameHash };
-
-static CFMutableDictionaryRef nameToTable = NULL;
-static CFMutableDictionaryRef encodingToTable = NULL;
-
-static void buildDictionaries()
-{
-    nameToTable = CFDictionaryCreateMutable(NULL, 0, &encodingNameKeyCallbacks, NULL);
-    encodingToTable = CFDictionaryCreateMutable(NULL, 0, NULL, NULL);
-
-    for (int i = 0; table[i].name != NULL; i++) {
-        CFDictionaryAddValue(nameToTable, table[i].name, &table[i]);
-        CFDictionaryAddValue(encodingToTable, reinterpret_cast<void *>(table[i].encoding), &table[i]);
-    }
-}
-
-CFStringEncoding KWQCFStringEncodingFromIANACharsetName(const char *name, KWQEncodingFlags *flags)
-{
-    if (nameToTable == NULL) {
-        buildDictionaries();
-    }
-
-    const void *value;
-    if (!CFDictionaryGetValueIfPresent(nameToTable, name, &value)) {
-        UErrorCode err = U_ZERO_ERROR;
-        name = ucnv_getStandardName(name, "IANA", &err);
-        
-        if (!name || !CFDictionaryGetValueIfPresent(nameToTable, name, &value)) {
-            if (flags) {
-                *flags = NoEncodingFlags;
-            }
-            return kCFStringEncodingInvalidId;
-        }
-    }
-    if (flags) {
-        *flags = static_cast<const CharsetEntry *>(value)->flags;
-    }
-    return static_cast<const CharsetEntry *>(value)->encoding;
-}
-
-const char *KWQCFStringEncodingToIANACharsetName(CFStringEncoding encoding)
-{
-    if (encodingToTable == NULL) {
-        buildDictionaries();
-    }
-    
-    const void *value;
-    if (!CFDictionaryGetValueIfPresent(encodingToTable, reinterpret_cast<void *>(encoding), &value)) {
-        return NULL;
-    }
-    return static_cast<const CharsetEntry *>(value)->name;
-}
-
-static Boolean encodingNamesEqual(const void *value1, const void *value2)
-{
-    const char *s1 = static_cast<const char *>(value1);
-    const char *s2 = static_cast<const char *>(value2);
-    
-    while (1) {
-        char c1;
-        do {
-            c1 = *s1++;
-        } while (c1 && !isalnum(c1));
-        char c2;
-        do {
-            c2 = *s2++;
-        } while (c2 && !isalnum(c2));
-        
-        if (tolower(c1) != tolower(c2)) {
-            return false;
-        }
-        
-        if (!c1 || !c2) {
-            return !c1 && !c2;
-        }
-    }
-}
-
-// Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
-// or anything like that.
-const unsigned PHI = 0x9e3779b9U;
-
-// This hash algorithm comes from:
-// http://burtleburtle.net/bob/hash/hashfaq.html
-// http://burtleburtle.net/bob/hash/doobs.html
-static CFHashCode encodingNameHash(const void *value)
-{
-    const char *s = static_cast<const char *>(value);
-    
-    CFHashCode h = PHI;
-
-    for (int i = 0; i != 16; ++i) {
-        char c;
-        do {
-            c = *s++;
-        } while (c && !isalnum(c));
-        if (!c) {
-            break;
-        }
-        h += tolower(c);
-        h += (h << 10); 
-        h ^= (h >> 6); 
-    }
-
-    h += (h << 3);
-    h ^= (h >> 11);
-    h += (h << 15);
-    return h;
-}
diff --git a/WebCore/kwq/KWQTextCodec.cpp b/WebCore/kwq/KWQTextCodec.cpp
deleted file mode 100644 (file)
index 87c9149..0000000
+++ /dev/null
@@ -1,556 +0,0 @@
-/*
- * Copyright (C) 2004 Apple Computer, Inc.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- */
-
-#include "config.h"
-#include "KWQTextCodec.h"
-
-#include <kxmlcore/Assertions.h>
-#include "KWQCharsets.h"
-#include "KWQLogging.h"
-#include <unicode/ucnv.h>
-#include <unicode/utypes.h>
-
-const UniChar replacementCharacter = 0xFFFD;
-const UniChar BOM = 0xFEFF;
-
-static const int ConversionBufferSize = 16384;
-
-class KWQTextDecoder : public QTextDecoder {
-public:
-    KWQTextDecoder(CFStringEncoding, KWQEncodingFlags);
-    ~KWQTextDecoder();
-    
-    QString toUnicode(const char *chs, int len, bool flush);
-
-private:
-    QString convert(const char *chs, int len, bool flush)
-        { return convert(reinterpret_cast<const unsigned char *>(chs), len, flush); }
-    QString convert(const unsigned char *chs, int len, bool flush);
-    QString convertLatin1(const unsigned char *chs, int len);
-    QString convertUTF16(const unsigned char *chs, int len);
-    
-    // ICU decoding.
-    QString convertUsingICU(const unsigned char *chs, int len, bool flush);
-    UErrorCode createICUConverter();
-
-    static void appendOmittingUnwanted(QString &s, const UniChar *characters, int byteCount);
-    
-    KWQTextDecoder(const KWQTextDecoder &);
-    KWQTextDecoder &operator=(const KWQTextDecoder &);
-
-    CFStringEncoding _encoding;
-    bool _littleEndian;
-    bool _atStart;
-    bool _error;
-
-    unsigned _numBufferedBytes;
-    unsigned char _bufferedBytes[16]; // bigger than any single multi-byte character
-
-    // ICU decoding.
-    UConverter *_converterICU;
-    static UConverter *_cachedConverterICU;
-    static CFStringEncoding _cachedConverterEncoding;
-};
-
-UConverter *KWQTextDecoder::_cachedConverterICU;
-CFStringEncoding KWQTextDecoder::_cachedConverterEncoding = kCFStringEncodingInvalidId;
-
-static Boolean QTextCodecsEqual(const void *value1, const void *value2);
-static CFHashCode QTextCodecHash(const void *value);
-
-static QTextCodec *codecForCFStringEncoding(CFStringEncoding encoding, KWQEncodingFlags flags)
-{
-    if (encoding == kCFStringEncodingInvalidId) {
-        return 0;
-    }
-    
-    static const CFDictionaryKeyCallBacks QTextCodecKeyCallbacks = { 0, NULL, NULL, NULL, QTextCodecsEqual, QTextCodecHash };
-    static CFMutableDictionaryRef encodingToCodec = CFDictionaryCreateMutable(NULL, 0, &QTextCodecKeyCallbacks, NULL);
-    
-    QTextCodec key(encoding, flags);
-    const void *value;
-    if (CFDictionaryGetValueIfPresent(encodingToCodec, &key, &value)) {
-        return const_cast<QTextCodec *>(static_cast<const QTextCodec *>(value));
-    }
-    QTextCodec *codec = new QTextCodec(encoding, flags);
-    CFDictionarySetValue(encodingToCodec, codec, codec);
-    return codec;
-}
-
-QTextCodec* QTextCodec::codecForName(const char* name)
-{
-    KWQEncodingFlags flags;
-    CFStringEncoding encoding = KWQCFStringEncodingFromIANACharsetName(name, &flags);
-    return codecForCFStringEncoding(encoding, flags);
-}
-
-QTextCodec* QTextCodec::utf8Codec()
-{
-    return codecForCFStringEncoding(kCFStringEncodingUTF8, NoEncodingFlags);
-}
-
-QTextCodec* QTextCodec::codecForNameEightBitOnly(const char* name)
-{
-    KWQEncodingFlags flags;
-    CFStringEncoding encoding = KWQCFStringEncodingFromIANACharsetName(name, &flags);
-    switch (encoding) {
-        case kCFStringEncodingUnicode:
-            encoding = kCFStringEncodingUTF8;
-            break;
-    }
-    return codecForCFStringEncoding(encoding, flags);
-}
-
-QTextCodec* QTextCodec::codecForLocale()
-{
-    return codecForCFStringEncoding(CFStringGetSystemEncoding(), NoEncodingFlags);
-}
-
-const char* QTextCodec::name() const
-{
-    return KWQCFStringEncodingToIANACharsetName(_encoding);
-}
-
-QTextDecoder* QTextCodec::makeDecoder() const
-{
-    return new KWQTextDecoder(_encoding, _flags);
-}
-
-inline CFStringEncoding effectiveEncoding(CFStringEncoding e)
-{
-    switch (e) {
-        case kCFStringEncodingISOLatin1:
-        case kCFStringEncodingASCII:
-            e = kCFStringEncodingWindowsLatin1;
-            break;
-    }
-    return e;
-}
-
-QCString QTextCodec::fromUnicode(const QString &qcs, bool allowEntities) const
-{
-    // FIXME: We should really use the same API in both directions.
-    // Currently we use ICU to decode and CFString to encode; it would be better to encode with ICU too.
-    
-    CFStringEncoding encoding = effectiveEncoding(_encoding);
-
-    // FIXME: Since there's no "force ASCII range" mode in CFString, we change the backslash into a yen sign.
-    // Encoding will change the yen sign back into a backslash.
-    QString copy = qcs;
-    copy.replace(QChar('\\'), backslashAsCurrencySymbol());
-    CFStringRef cfs = copy.getCFString();
-    
-    CFIndex startPos = 0;
-    CFIndex charactersLeft = CFStringGetLength(cfs);
-    QCString result(1); // for trailng zero
-
-    while (charactersLeft > 0) {
-        CFRange range = CFRangeMake(startPos, charactersLeft);
-        CFIndex bufferLength;
-        CFStringGetBytes(cfs, range, encoding, allowEntities ? 0 : '?', false, NULL, 0x7FFFFFFF, &bufferLength);
-        
-        QCString chunk(bufferLength + 1);
-        CFIndex charactersConverted = CFStringGetBytes(cfs, range, encoding, allowEntities ? 0 : '?', false, reinterpret_cast<unsigned char *>(chunk.data()), bufferLength, &bufferLength);
-        chunk[bufferLength] = 0;
-        result.append(chunk);
-        
-        if (charactersConverted != charactersLeft) {
-            // FIXME: support surrogate pairs
-            UniChar badChar = CFStringGetCharacterAtIndex(cfs, startPos + charactersConverted);
-            char buf[16];
-            sprintf(buf, "&#%u;", badChar);
-            result.append(buf);
-            
-            ++charactersConverted;
-        }
-        
-        startPos += charactersConverted;
-        charactersLeft -= charactersConverted;
-    }
-    return result;
-}
-
-QString QTextCodec::toUnicode(const char *chs, int len) const
-{
-    return KWQTextDecoder(_encoding, _flags).toUnicode(chs, len, true);
-}
-
-QString QTextCodec::toUnicode(const ByteArray &qba, int len) const
-{
-    return KWQTextDecoder(_encoding, _flags).toUnicode(qba, len, true);
-}
-
-QChar QTextCodec::backslashAsCurrencySymbol() const
-{
-    // FIXME: We should put this information into KWQCharsetData instead of having a switch here.
-    switch (_encoding) {
-        case kCFStringEncodingShiftJIS_X0213_00:
-        case kCFStringEncodingEUC_JP:
-            return 0x00A5; // yen sign
-        default:
-            return '\\';
-    }
-}
-
-bool operator==(const QTextCodec &a, const QTextCodec &b)
-{
-    return a._encoding == b._encoding && a._flags == b._flags;
-}
-
-// Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
-// or anything like that.
-const unsigned PHI = 0x9e3779b9U;
-
-// Paul Hsieh's SuperFastHash
-// http://www.azillionmonkeys.com/qed/hash.html
-// Adapted assuming _encoding is 32 bits and _flags is at most 16 bits
-unsigned QTextCodec::hash() const
-{
-    uint32_t hash = PHI;
-    uint32_t tmp;
-    
-    hash += _encoding & 0xffff;
-    tmp = ((_encoding >> 16) << 11) ^ hash;
-    hash = (hash << 16) ^ tmp;
-    hash += hash >> 11;
-    
-    hash += _flags & 0xffff;
-    hash ^= hash << 11;
-    hash += hash >> 17;
-
-    // Force "avalanching" of final 127 bits
-    hash ^= hash << 3;
-    hash += hash >> 5;
-    hash ^= hash << 2;
-    hash += hash >> 15;
-    hash ^= hash << 10;
-
-    return hash;
-}
-
-static Boolean QTextCodecsEqual(const void *a, const void *b)
-{
-    return *static_cast<const QTextCodec *>(a) == *static_cast<const QTextCodec *>(b);
-}
-
-static CFHashCode QTextCodecHash(const void *value)
-{
-    return static_cast<const QTextCodec *>(value)->hash();
-}
-
-// ================
-
-QTextDecoder::~QTextDecoder()
-{
-}
-
-// ================
-
-KWQTextDecoder::KWQTextDecoder(CFStringEncoding e, KWQEncodingFlags f)
-    : _encoding(e), _littleEndian(f & ::LittleEndian), _atStart(true), _error(false)
-    , _numBufferedBytes(0), _converterICU(0)
-{
-}
-
-KWQTextDecoder::~KWQTextDecoder()
-{
-    if (_converterICU) {
-        if (_cachedConverterICU != 0) {
-            ucnv_close(_cachedConverterICU);
-        }
-        _cachedConverterICU = _converterICU;
-        _cachedConverterEncoding = _encoding;
-    }
-}
-
-QString KWQTextDecoder::convertLatin1(const unsigned char *s, int length)
-{
-    ASSERT(_numBufferedBytes == 0);
-
-    return QString(reinterpret_cast<const char *>(s), length);
-}
-
-QString KWQTextDecoder::convertUTF16(const unsigned char *s, int length)
-{
-    ASSERT(_numBufferedBytes == 0 || _numBufferedBytes == 1);
-
-    const unsigned char *p = s;
-    unsigned len = length;
-    
-    QString result("");
-    
-    result.reserve(length / 2);
-
-    if (_numBufferedBytes != 0 && len != 0) {
-        ASSERT(_numBufferedBytes == 1);
-        UniChar c;
-        if (_littleEndian) {
-            c = _bufferedBytes[0] | (p[0] << 8);
-        } else {
-            c = (_bufferedBytes[0] << 8) | p[0];
-        }
-        if (c) {
-            result.append(reinterpret_cast<QChar *>(&c), 1);
-        }
-        _numBufferedBytes = 0;
-        p += 1;
-        len -= 1;
-    }
-    
-    while (len > 1) {
-        UniChar buffer[ConversionBufferSize];
-        int runLength = MIN(len / 2, sizeof(buffer) / sizeof(buffer[0]));
-        int bufferLength = 0;
-        if (_littleEndian) {
-            for (int i = 0; i < runLength; ++i) {
-                UniChar c = p[0] | (p[1] << 8);
-                p += 2;
-                if (c && c != BOM) {
-                    buffer[bufferLength++] = c;
-                }
-            }
-        } else {
-            for (int i = 0; i < runLength; ++i) {
-                UniChar c = (p[0] << 8) | p[1];
-                p += 2;
-                if (c && c != BOM) {
-                    buffer[bufferLength++] = c;
-                }
-            }
-        }
-        result.append(reinterpret_cast<QChar *>(buffer), bufferLength);
-        len -= runLength * 2;
-    }
-    
-    if (len) {
-        ASSERT(_numBufferedBytes == 0);
-        _numBufferedBytes = 1;
-        _bufferedBytes[0] = p[0];
-    }
-    
-    return result;
-}
-
-UErrorCode KWQTextDecoder::createICUConverter()
-{
-    const CFStringEncoding encoding = effectiveEncoding(_encoding);
-    const char *encodingName = KWQCFStringEncodingToIANACharsetName(encoding);
-
-    bool cachedEncodingEqual = _cachedConverterEncoding == encoding;
-    _cachedConverterEncoding = kCFStringEncodingInvalidId;
-
-    if (cachedEncodingEqual && _cachedConverterICU) {
-        _converterICU = _cachedConverterICU;
-        _cachedConverterICU = 0;
-        LOG(TextConversion, "using cached ICU converter for encoding: %s", encodingName);
-    } else {    
-        UErrorCode err = U_ZERO_ERROR;
-        ASSERT(!_converterICU);
-        LOG(TextConversion, "creating ICU converter for encoding: %s", encodingName);
-        _converterICU = ucnv_open(encodingName, &err);
-        if (err == U_AMBIGUOUS_ALIAS_WARNING) {
-            LOG_ERROR("ICU ambiguous alias warning for encoding: %s", encodingName);
-        }
-        if (!_converterICU) {
-            LOG_ERROR("the ICU Converter won't convert from text encoding 0x%X, error %d", encoding, err);
-            return err;
-        }
-    }
-    
-    return U_ZERO_ERROR;
-}
-
-// We strip replacement characters because the ICU converter for UTF-8 converts
-// invalid sequences into replacement characters, but other browsers discard them.
-// We strip BOM characters because they can show up both at the start of content
-// and inside content, and we never want them to end up in the decoded text.
-static inline bool unwanted(UniChar c)
-{
-    switch (c) {
-        case replacementCharacter:
-        case BOM:
-            return true;
-        default:
-            return false;
-    }
-}
-
-void KWQTextDecoder::appendOmittingUnwanted(QString &s, const UniChar *characters, int byteCount)
-{
-    ASSERT(byteCount % sizeof(UniChar) == 0);
-    int start = 0;
-    int characterCount = byteCount / sizeof(UniChar);
-    for (int i = 0; i != characterCount; ++i) {
-        if (unwanted(characters[i])) {
-            if (start != i) {
-                s.append(reinterpret_cast<const QChar *>(&characters[start]), i - start);
-            }
-            start = i + 1;
-        }
-    }
-    if (start != characterCount) {
-        s.append(reinterpret_cast<const QChar *>(&characters[start]), characterCount - start);
-    }
-}
-
-QString KWQTextDecoder::convertUsingICU(const unsigned char *chs, int len, bool flush)
-{
-    // Get a converter for the passed-in encoding.
-    if (!_converterICU && U_FAILURE(createICUConverter())) {
-        return QString();
-    }
-    ASSERT(_converterICU);
-
-    QString result("");
-    result.reserve(len);
-
-    UChar buffer[ConversionBufferSize];
-    const char *source = reinterpret_cast<const char *>(chs);
-    const char *sourceLimit = source + len;
-    int32_t *offsets = NULL;
-    UErrorCode err;
-    
-    do {
-        UChar *target = buffer;
-        const UChar *targetLimit = target + ConversionBufferSize;
-        err = U_ZERO_ERROR;
-        ucnv_toUnicode(_converterICU, &target, targetLimit, &source, sourceLimit, offsets, flush, &err);
-        int count = target - buffer;
-        appendOmittingUnwanted(result, reinterpret_cast<const UniChar *>(buffer), count * sizeof(UniChar));
-    } while (err == U_BUFFER_OVERFLOW_ERROR);
-
-    if (U_FAILURE(err)) {
-        // flush the converter so it can be reused, and not be bothered by this error.
-        do {
-            UChar *target = buffer;
-            const UChar *targetLimit = target + ConversionBufferSize;
-            err = U_ZERO_ERROR;
-            ucnv_toUnicode(_converterICU, &target, targetLimit, &source, sourceLimit, offsets, true, &err);
-        } while (source < sourceLimit);
-        LOG_ERROR("ICU conversion error");
-        return QString();
-    }
-    
-    return result;
-}
-
-QString KWQTextDecoder::convert(const unsigned char *chs, int len, bool flush)
-{
-    //#define PARTIAL_CHARACTER_HANDLING_TEST_CHUNK_SIZE 1000
-
-    switch (_encoding) {
-    case kCFStringEncodingISOLatin1:
-    case kCFStringEncodingWindowsLatin1:
-        return convertLatin1(chs, len);
-
-    case kCFStringEncodingUnicode:
-        return convertUTF16(chs, len);
-
-    default:
-#if PARTIAL_CHARACTER_HANDLING_TEST_CHUNK_SIZE
-        QString result;
-        int chunkSize;
-        for (int i = 0; i != len; i += chunkSize) {
-            chunkSize = len - i;
-            if (chunkSize > PARTIAL_CHARACTER_HANDLING_TEST_CHUNK_SIZE) {
-                chunkSize = PARTIAL_CHARACTER_HANDLING_TEST_CHUNK_SIZE;
-            }
-            result += convertUsingICU(chs + i, chunkSize, flush && (i + chunkSize == len));
-        }
-        return result;
-#else
-        return convertUsingICU(chs, len, flush);
-#endif
-    }
-    ASSERT_NOT_REACHED();
-    return QString();
-}
-
-QString KWQTextDecoder::toUnicode(const char *chs, int len, bool flush)
-{
-    ASSERT_ARG(len, len >= 0);
-    
-    if (_error || !chs) {
-        return QString();
-    }
-    if (len <= 0 && !flush) {
-        return "";
-    }
-
-    // Handle normal case.
-    if (!_atStart) {
-        return convert(chs, len, flush);
-    }
-
-    // Check to see if we found a BOM.
-    int numBufferedBytes = _numBufferedBytes;
-    int buf1Len = numBufferedBytes;
-    int buf2Len = len;
-    const unsigned char *buf1 = _bufferedBytes;
-    const unsigned char *buf2 = reinterpret_cast<const unsigned char *>(chs);
-    unsigned char c1 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
-    unsigned char c2 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
-    unsigned char c3 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
-    int BOMLength = 0;
-    if (c1 == 0xFF && c2 == 0xFE) {
-        _encoding = kCFStringEncodingUnicode;
-        _littleEndian = true;
-        BOMLength = 2;
-    } else if (c1 == 0xFE && c2 == 0xFF) {
-        _encoding = kCFStringEncodingUnicode;
-        _littleEndian = false;
-        BOMLength = 2;
-    } else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) {
-        _encoding = kCFStringEncodingUTF8;
-        BOMLength = 3;
-    }
-
-    // Handle case where we found a BOM.
-    if (BOMLength != 0) {
-        ASSERT(numBufferedBytes + len >= BOMLength);
-        int skip = BOMLength - numBufferedBytes;
-        _numBufferedBytes = 0;
-        _atStart = false;
-        return len == skip ? QString("") : convert(chs + skip, len - skip, flush);
-    }
-
-    // Handle case where we know there is no BOM coming.
-    const int bufferSize = sizeof(_bufferedBytes);
-    if (numBufferedBytes + len > bufferSize || flush) {
-        _atStart = false;
-        if (numBufferedBytes == 0) {
-            return convert(chs, len, flush);
-        }
-        unsigned char bufferedBytes[sizeof(_bufferedBytes)];
-        memcpy(bufferedBytes, _bufferedBytes, numBufferedBytes);
-        _numBufferedBytes = 0;
-        return convert(bufferedBytes, numBufferedBytes, false) + convert(chs, len, flush);
-    }
-
-    // Continue to look for the BOM.
-    memcpy(&_bufferedBytes[numBufferedBytes], chs, len);
-    _numBufferedBytes += len;
-    return "";
-}
diff --git a/WebCore/kwq/KWQTextCodec.h b/WebCore/kwq/KWQTextCodec.h
deleted file mode 100644 (file)
index 275838e..0000000
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (C) 2004 Apple Computer, Inc.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- */
-
-#ifndef QTEXTCODEC_H_
-#define QTEXTCODEC_H_
-
-#include "KWQCharsets.h"
-#include "QString.h"
-#include "KWQCString.h"
-
-class QTextDecoder;
-
-class QTextCodec {
-public:
-    static QTextCodec* codecForName(const char *);
-    static QTextCodec* codecForNameEightBitOnly(const char *);
-    static QTextCodec* codecForLocale();
-    static QTextCodec* utf8Codec();
-
-#if __APPLE__
-    explicit QTextCodec(CFStringEncoding e, KWQEncodingFlags f = NoEncodingFlags) : _encoding(e), _flags(f) { }
-#endif
-
-    const char *name() const;
-    bool usesVisualOrdering() const { return _flags & VisualOrdering; }
-    bool isJapanese() const { return _flags & IsJapanese; }
-    
-    QChar backslashAsCurrencySymbol() const;
-
-    QTextDecoder *makeDecoder() const;
-
-    QCString fromUnicode(const QString &str, bool allowEntities = false) const;
-
-    QString toUnicode(const char *, int) const;
-    QString toUnicode(const ByteArray &, int) const;
-    
-    friend bool operator==(const QTextCodec &, const QTextCodec &);
-    unsigned hash() const;
-    
-private:
-#if __APPLE__
-    CFStringEncoding _encoding;
-#endif
-    KWQEncodingFlags _flags;
-};
-
-inline bool operator!=(const QTextCodec &a, const QTextCodec &b) { return !(a == b); }
-
-class QTextDecoder {
-public:
-    virtual ~QTextDecoder();
-    virtual QString toUnicode(const char *, int, bool flush = false) = 0;
-};
-
-#endif
index 0ab7f6e..c9d8338 100644 (file)
 #import <Foundation/Foundation.h>
 
 #ifdef __cplusplus
-    class QTextDecoder;
+namespace WebCore {
+    class StreamingTextDecoder;
+}
+typedef WebCore::StreamingTextDecoder PlatformDecoder;
 #else
-    @class QTextDecoder;
+    @class PlatformDecoder;
 #endif
 
 @interface WebCoreTextDecoder : NSObject
 {
 @private
-    QTextDecoder *_decoder;
+    PlatformDecoder *_decoder;
 }
 
 - (WebCoreTextDecoder *)initWithEncodingName:(NSString *)encodingName;
index f9ab971..b8b7aa0 100644 (file)
 #import "config.h"
 #import "WebCoreTextDecoder.h"
 
-#import "KWQTextCodec.h"
+#import "TextEncoding.h"
+#import "StreamingTextDecoder.h"
+
+using namespace WebCore;
 
 @implementation WebCoreTextDecoder
 
 {
     self = [super init];
     
-    QTextCodec *codec = QTextCodec::codecForName([encodingName cStringUsingEncoding:NSASCIIStringEncoding]);
-    if (!codec)
-        codec = QTextCodec::codecForName("ISO-8859-1");
+    WebCore::TextEncoding encoding = WebCore::TextEncoding([encodingName cStringUsingEncoding:NSASCIIStringEncoding]);
+    if (!encoding.isValid())
+        encoding = WebCore::TextEncoding(Latin1Encoding);
     
-    _decoder = codec->makeDecoder();
+    _decoder = new StreamingTextDecoder(encoding);
     
     return self;
 }
index c648b2c..fd9e64a 100644 (file)
 #include "CachedObjectClientWalker.h"
 #include "KWQLoader.h"
 #include "loader.h"
-#include <qtextcodec.h>
+#include "TextEncoding.h"
 
 namespace WebCore {
 
 CachedCSSStyleSheet::CachedCSSStyleSheet(DocLoader* dl, const DOMString &url, KIO::CacheControl _cachePolicy, time_t _expireDate, const QString& charset)
-    : CachedObject(url, CSSStyleSheet, _cachePolicy, _expireDate), m_codec(0)
+    : CachedObject(url, CSSStyleSheet, _cachePolicy, _expireDate)
+    , m_encoding(charset.latin1())
 {
     // It's css we want.
     setAccept("text/css");
     // load the file
     Cache::loader()->load(dl, this, false);
     m_loading = true;
-    if (!charset.isEmpty())
-        m_codec = QTextCodec::codecForName(charset.latin1());
-    if (!m_codec)
-        m_codec = QTextCodec::codecForName("iso8859-1");
+    if (!m_encoding.isValid())
+        m_encoding = TextEncoding(Latin1Encoding);
 }
 
 CachedCSSStyleSheet::CachedCSSStyleSheet(const DOMString &url, const QString &stylesheet_data)
     : CachedObject(url, CSSStyleSheet, KIO::CC_Verify, 0, stylesheet_data.length())
+    , m_encoding(InvalidEncoding)
 {
     m_loading = false;
     m_status = Persistent;
-    m_codec = 0;
     m_sheet = DOMString(stylesheet_data);
 }
 
@@ -79,12 +78,12 @@ void CachedCSSStyleSheet::deref(CachedObjectClient *c)
       delete this;
 }
 
-void CachedCSSStyleSheet::setCharset( const QString &chs )
+void CachedCSSStyleSheet::setCharset(const QString& chs)
 {
     if (!chs.isEmpty()) {
-        QTextCodec *codec = QTextCodec::codecForName(chs.latin1());
-        if (codec)
-            m_codec = codec;
+        TextEncoding encoding = TextEncoding(chs.latin1());
+        if (encoding.isValid())
+            m_encoding = encoding;
     }
 }
 
@@ -94,7 +93,7 @@ void CachedCSSStyleSheet::data(ByteArray& data, bool eof )
         return;
 
     setSize(data.size());
-    m_sheet = DOMString(m_codec->toUnicode(data.data(), size()));
+    m_sheet = DOMString(m_encoding.toUnicode(data.data(), size()));
     m_loading = false;
 
     checkNotify();
index 785fa76..e89dc6a 100644 (file)
@@ -28,8 +28,7 @@
 #define KHTML_CachedCSSStyleSheet_h
 
 #include "CachedObject.h"
-
-class QTextCodec;
+#include "TextEncoding.h"
 
 namespace khtml
 {
@@ -57,7 +56,7 @@ namespace khtml
 
     protected:
         DOMString m_sheet;
-        QTextCodec* m_codec;
+        TextEncoding m_encoding;
     };
 
 }
index 664fb69..49f3250 100644 (file)
 #include "CachedObjectClient.h"
 #include "CachedObjectClientWalker.h"
 #include "loader.h"
-#include <qtextcodec.h>
+#include "TextEncoding.h"
 
 namespace WebCore {
 
 CachedScript::CachedScript(DocLoader* dl, const DOMString &url, KIO::CacheControl _cachePolicy, time_t _expireDate, const QString& charset)
-    : CachedObject(url, Script, _cachePolicy, _expireDate), m_codec(0)
+    : CachedObject(url, Script, _cachePolicy, _expireDate)
+    , m_encoding(charset.latin1())
 {
     // It's javascript we want.
     // But some websites think their scripts are <some wrong mimetype here>
@@ -47,19 +48,17 @@ CachedScript::CachedScript(DocLoader* dl, const DOMString &url, KIO::CacheContro
     // load the file
     Cache::loader()->load(dl, this, false);
     m_loading = true;
-    if (!charset.isEmpty())
-        m_codec = QTextCodec::codecForName(charset.latin1());
-    if (!m_codec)
-        m_codec = QTextCodec::codecForName("iso8859-1");
+    if (!m_encoding.isValid())
+        m_encoding = TextEncoding(Latin1Encoding);
 }
 
 CachedScript::CachedScript(const DOMString &url, const QString &script_data)
     : CachedObject(url, Script, KIO::CC_Verify, 0, script_data.length())
+    , m_encoding(InvalidEncoding)
 {
     m_errorOccurred = false;
     m_loading = false;
     m_status = Persistent;
-    m_codec = 0;
     m_script = DOMString(script_data);
 }
 
@@ -82,13 +81,11 @@ void CachedScript::deref(CachedObjectClient *c)
       delete this;
 }
 
-void CachedScript::setCharset( const QString &chs )
+void CachedScript::setCharset(const QString &chs)
 {
-    if (!chs.isEmpty()) {
-        QTextCodec *codec = QTextCodec::codecForName(chs.latin1());
-        if (codec)
-            m_codec = codec;
-    }
+    TextEncoding encoding = TextEncoding(chs.latin1());
+    if (encoding.isValid())
+        m_encoding = encoding;
 }
 
 void CachedScript::data(ByteArray& data, bool eof )
@@ -96,7 +93,7 @@ void CachedScript::data(ByteArray& data, bool eof )
     if (!eof)
         return;
     setSize(data.size());
-    m_script = String(m_codec->toUnicode(data.data(), size()));
+    m_script = String(m_encoding.toUnicode(data.data(), size()));
     m_loading = false;
     checkNotify();
 }
index 149a358..c253b80 100644 (file)
@@ -28,8 +28,7 @@
 #define KHTML_CachedScript_h
 
 #include "CachedObject.h"
-
-class QTextCodec;
+#include "TextEncoding.h"
 
 namespace WebCore {
     class DocLoader;
@@ -57,7 +56,7 @@ namespace WebCore {
 
     private:
        String m_script;
-        QTextCodec* m_codec;
+        TextEncoding m_encoding;
         bool m_errorOccurred;
     };
 }
index f3c5ae5..3ac7e6c 100644 (file)
@@ -71,7 +71,7 @@ void CachedXSLStyleSheet::deref(CachedObjectClient *c)
 void CachedXSLStyleSheet::setCharset( const QString &chs )
 {
     if (!chs.isEmpty())
-        m_decoder->setEncoding(chs.latin1(), Decoder::EncodingFromHTTPHeader);
+        m_decoder->setEncodingName(chs.latin1(), Decoder::EncodingFromHTTPHeader);
 }
 
 void CachedXSLStyleSheet::data(ByteArray& data, bool eof)
index e05fdb2..422a835 100644 (file)
@@ -80,7 +80,7 @@
 #include <klocale.h>
 #include <kxmlcore/Assertions.h>
 #include <qptrlist.h>
-#include <qtextcodec.h>
+#include "TextEncoding.h"
 #include <sys/types.h>
 #include <math.h>
 
@@ -639,15 +639,15 @@ void Frame::begin( const KURL &url, int xOffset, int yOffset )
     d->m_view->resizeContents( 0, 0 );
 }
 
-void Frame::write( const char *str, int len )
+void Frame::write(const char* str, int len)
 {
     if ( !d->m_decoder ) {
         d->m_decoder = new Decoder;
         if (!d->m_encoding.isNull())
-            d->m_decoder->setEncoding(d->m_encoding.latin1(),
+            d->m_decoder->setEncodingName(d->m_encoding.latin1(),
                 d->m_haveEncoding ? Decoder::UserChosenEncoding : Decoder::EncodingFromHTTPHeader);
         else
-            d->m_decoder->setEncoding(settings()->encoding().latin1(), Decoder::DefaultEncoding);
+            d->m_decoder->setEncodingName(settings()->encoding().latin1(), Decoder::DefaultEncoding);
 
         if (d->m_doc)
             d->m_doc->setDecoder(d->m_decoder.get());
@@ -750,7 +750,7 @@ void Frame::gotoAnchor()
         // Decoding here has to match encoding in completeURL, which means it has to use the
         // page's encoding rather than UTF-8.
         if (d->m_decoder)
-            gotoAnchor(KURL::decode_string(ref, d->m_decoder->codec()));
+            gotoAnchor(KURL::decode_string(ref, d->m_decoder->encoding()));
     }
 }
 
@@ -1014,13 +1014,13 @@ void Frame::receivedRedirect(TransferJob*, const KURL& url)
 
 QString Frame::encoding() const
 {
-    if(d->m_haveEncoding && !d->m_encoding.isEmpty())
+    if (d->m_haveEncoding && !d->m_encoding.isEmpty())
         return d->m_encoding;
 
-    if(d->m_decoder && d->m_decoder->encoding())
-        return QString(d->m_decoder->encoding());
+    if (d->m_decoder && d->m_decoder->encoding().isValid())
+        return QString(d->m_decoder->encodingName());
 
-    return(settings()->encoding());
+    return settings()->encoding();
 }
 
 void Frame::setUserStyleSheet(const KURL& url)
@@ -3215,11 +3215,8 @@ QChar Frame::backslashAsCurrencySymbol() const
     Decoder *decoder = doc->decoder();
     if (!decoder)
         return '\\';
-    const QTextCodec *codec = decoder->codec();
-    if (!codec)
-        return '\\';
 
-    return codec->backslashAsCurrencySymbol();
+    return decoder->encoding().backslashAsCurrencySymbol();
 }
 
 bool Frame::markedTextUsesUnderlines() const
diff --git a/WebCore/platform/CharsetNames.cpp b/WebCore/platform/CharsetNames.cpp
new file mode 100644 (file)
index 0000000..593128b
--- /dev/null
@@ -0,0 +1,161 @@
+/*
+ * Copyright (C) 2003, 2006 Apple Computer, Inc.  All rights reserved.
+ * Copyright (C) 2005 Alexey Proskuryakov <ap@nypop.com>.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#include "config.h"
+#include "CharsetNames.h"
+
+#include <kxmlcore/HashMap.h>
+#include <unicode/ucnv.h>
+#include <unicode/utypes.h>
+
+#if __APPLE__
+#include "ExtraCFEncodings.h"
+#endif
+
+namespace KXMLCore {
+
+    template<> struct DefaultHash<WebCore::TextEncodingID> {
+        typedef PtrHash<WebCore::TextEncodingID> Hash;
+    };
+
+    template<> struct HashTraits<WebCore::TextEncodingID> : GenericHashTraits<WebCore::TextEncodingID> {
+        static TraitType deletedValue() { return WebCore::InvalidEncoding; }
+    };
+    
+} // namespace KXMLCore
+
+namespace WebCore {
+
+struct CharsetEntry {
+    const char* name;
+    TextEncodingID encoding;
+    int flags; // actually TextEncodingFlags
+};
+
+// The following autogenerated file includes the charset data.
+#include "CharsetData.cpp"
+
+// Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
+// or anything like that.
+const unsigned PHI = 0x9e3779b9U;
+
+struct EncodingHash {
+    static bool equal(const char *s1, const char *s2)
+    {
+        char c1;
+        char c2;
+        
+        do {
+            do {
+                c1 = *s1++;
+            } while (c1 && !isalnum(c1));
+            do {
+                c2 = *s2++;
+            } while (c2 && !isalnum(c2));
+            
+            if (tolower(c1) != tolower(c2))
+                return false;
+        } while (c1 && c2);
+
+        return !c1 && !c2;
+    }
+
+    // This hash algorithm comes from:
+    // http://burtleburtle.net/bob/hash/hashfaq.html
+    // http://burtleburtle.net/bob/hash/doobs.html
+    static unsigned hash(const char* s)
+    {
+        unsigned h = PHI;
+        
+        for (int i = 0; i != 16; ++i) {
+            char c;
+            do {
+                c = *s++;
+            } while (c && !isalnum(c));
+            if (!c) {
+                break;
+            }
+            h += tolower(c);
+            h += (h << 10); 
+            h ^= (h >> 6); 
+        }
+        
+        h += (h << 3);
+        h ^= (h >> 11);
+        h += (h << 15);
+        
+        return h;
+    }
+};
+
+static HashMap<const char*, const CharsetEntry*, EncodingHash>* nameMap;
+static HashMap<TextEncodingID, const CharsetEntry*>* encodingMap;
+
+static void buildDictionaries()
+{
+    nameMap = new HashMap<const char*, const CharsetEntry*, EncodingHash>();
+    encodingMap = new HashMap<TextEncodingID, const CharsetEntry*>();
+
+    for (int i = 0; table[i].name; ++i) {
+        nameMap->add(table[i].name, &table[i]);
+        encodingMap->add(table[i].encoding, &table[i]);
+    }
+}
+
+TextEncodingID textEncodingIDFromCharsetName(const char* name, TextEncodingFlags* flags)
+{
+    if (!nameMap)
+        buildDictionaries();
+
+    const CharsetEntry* entry = nameMap->get(name);
+    if (!entry) {
+        UErrorCode err = U_ZERO_ERROR;
+        name = ucnv_getStandardName(name, "IANA", &err);
+        
+        if (!name || !(entry = nameMap->get(name))) {
+            if (flags)
+                *flags = NoEncodingFlags;
+            return InvalidEncoding;
+        }
+    }
+
+    if (flags)
+        *flags = (TextEncodingFlags)entry->flags;
+    return entry->encoding;
+}
+
+const char* charsetNameFromTextEncodingID(TextEncodingID encoding)
+{
+    if (!encodingMap)
+        buildDictionaries();
+    
+    const CharsetEntry* entry = encodingMap->get(encoding);
+    if (!entry)
+        return 0;
+    return entry->name;
+}
+
+} // namespace WebCore
similarity index 71%
rename from WebCore/kwq/KWQCharsets.h
rename to WebCore/platform/CharsetNames.h
index 8b11b1e..105a987 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2003 Apple Computer, Inc.  All rights reserved.
+ * Copyright (C) 2003, 2006 Apple Computer, Inc.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
  */
 
-#ifndef KWQCHARSETS_H_
-#define KWQCHARSETS_H_
+#ifndef CharsetNames_H
+#define CharsetNames_H
 
-#if __APPLE__
-#include <CoreFoundation/CoreFoundation.h>
-#endif
+#include "TextEncoding.h"
 
-enum KWQEncodingFlags {
-    NoEncodingFlags = 0,
-    VisualOrdering = 1,
-    BigEndian = 2,
-    LittleEndian = 4,
-    IsJapanese = 8
-};
+namespace WebCore {
 
-#if __APPLE__
-CFStringEncoding KWQCFStringEncodingFromIANACharsetName(const char *, KWQEncodingFlags *flags = 0);
-const char *KWQCFStringEncodingToIANACharsetName(CFStringEncoding);
-#endif
+TextEncodingID textEncodingIDFromCharsetName(const char*, TextEncodingFlags* flags = 0);
+const char* charsetNameFromTextEncodingID(TextEncodingID);
 
-#endif /* KWQCHARSETS_H_ */
+} // namespace WebCore
+
+#endif // ChaserNames_H
index b489be9..7cd926f 100644 (file)
 
 #include <kxmlcore/Assertions.h>
 #include "KWQRegExp.h"
-#include "KWQTextCodec.h"
+#include "TextEncoding.h"
 #include <kxmlcore/Vector.h>
 #include <unicode/uidna.h>
 #include <utility>
 
 using std::pair;
 using std::make_pair;
+using namespace WebCore;
 
 typedef enum {
     // alpha 
@@ -197,8 +198,8 @@ static const unsigned char characterClassTable[256] = {
     /* 252 */ BadChar, /* 253 */ BadChar, /* 254 */ BadChar, /* 255 */ BadChar
 };
 
-static int copyPathRemovingDots(char *dst, const char *src, int srcStart, int srcEnd);
-static char *encodeRelativeString(const KURL &base, const QString &rel, const QTextCodec *codec);
+static int copyPathRemovingDots(char* dst, const char* src, int srcStart, int srcEnd);
+static char* encodeRelativeString(const KURL &base, const QString& rel, const WebCore::TextEncoding& encoding);
 static QString substituteBackslashes(const QString &string);
 
 static inline bool isSchemeFirstChar(unsigned char c) { return characterClassTable[c] & SchemeFirstChar; }
@@ -257,7 +258,7 @@ KURL::KURL(const QString &url)
         parse(url.ascii(), &url);
 }
 
-KURL::KURL(const KURL &base, const QString &relative, const QTextCodec *codec)
+KURL::KURL(const KURL &base, const QString &relative, const WebCore::TextEncoding& encoding)
 {
     // Allow at lest absolute URLs to resolve against an empty URL.
     if (!base.m_isValid && !base.isEmpty()) {
@@ -283,7 +284,7 @@ KURL::KURL(const KURL &base, const QString &relative, const QTextCodec *codec)
         strBuffer = 0;
         str = rel.ascii();
     } else {
-        strBuffer = encodeRelativeString(base, rel, codec);
+        strBuffer = encodeRelativeString(base, rel, encoding);
         str = strBuffer;
     }
     
@@ -722,9 +723,9 @@ QString KURL::prettyURL() const
     return result;
 }
 
-QString KURL::decode_string(const QString &urlString, const QTextCodec *codec)
+QString KURL::decode_string(const QString& urlString, const WebCore::TextEncoding& encoding)
 {
-    static const QTextCodec* UTF8Codec = QTextCodec::utf8Codec();
+    static const WebCore::TextEncoding utf8Encoding(UTF8Encoding);
 
     QString result("");
 
@@ -763,7 +764,7 @@ QString KURL::decode_string(const QString &urlString, const QTextCodec *codec)
         }
 
         // Decode the bytes into Unicode characters.
-        QString decoded = (codec ? codec : UTF8Codec)->toUnicode(buffer, p - buffer);
+        QString decoded = (encoding.isValid() ? encoding : utf8Encoding).toUnicode(buffer, p - buffer);
         if (decoded.isEmpty()) {
             continue;
         }
@@ -1413,22 +1414,22 @@ static QString encodeHostnames(const QString &s)
     return s;
 }
 
-static char *encodeRelativeString(const KURL &base, const QString &rel, const QTextCodec *codec)
+static char *encodeRelativeString(const KURL &base, const QString &rel, const WebCore::TextEncoding& encoding)
 {
     QString s = encodeHostnames(rel);
 
     char *strBuffer;
 
-    static const QTextCodec* UTF8Codec = QTextCodec::utf8Codec();
+    static const WebCore::TextEncoding utf8Encoding(UTF8Encoding);
 
-    const QTextCodec *pathCodec = codec ? codec : UTF8Codec;
-    const QTextCodec *otherCodec = pathCodec;
+    WebCore::TextEncoding pathEncoding = encoding.isValid() ? encoding : utf8Encoding;
+    WebCore::TextEncoding otherEncoding = pathEncoding;
     
     // Always use UTF-8 for mailto URLs because that's what mail applications expect.
     // Always use UTF-8 for paths in file and help URLs, since they are local filesystem paths,
     // and help content is often defined with this in mind, but use native encoding for the
     // non-path parts of the URL.
-    if (pathCodec != UTF8Codec) {
+    if (pathEncoding != utf8Encoding) {
         QString protocol;
         if (rel.length() > 0 && isSchemeFirstChar(rel.at(0).latin1())) {
             for (uint i = 1; i < rel.length(); i++) {
@@ -1447,26 +1448,26 @@ static char *encodeRelativeString(const KURL &base, const QString &rel, const QT
         }
         protocol = protocol.lower();
         if (protocol == "file" || protocol == "help") {
-            pathCodec = UTF8Codec;
+            pathEncoding = utf8Encoding;
         } else if (protocol == "mailto") {
-            pathCodec = UTF8Codec;
-            otherCodec = UTF8Codec;
+            pathEncoding = utf8Encoding;
+            otherEncoding = utf8Encoding;
         }
     }
     
     int pathEnd = -1;
-    if (*pathCodec != *otherCodec) {
+    if (pathEncoding != otherEncoding) {
         pathEnd = s.find(QRegExp("[?#]"));
     }
     if (pathEnd == -1) {
-        QCString decoded = pathCodec->fromUnicode(s);
+        QCString decoded = pathEncoding.fromUnicode(s);
         int decodedLength = decoded.length();
         strBuffer = static_cast<char *>(fastMalloc(decodedLength + 1));
         memcpy(strBuffer, decoded, decodedLength);
         strBuffer[decodedLength] = 0;
     } else {
-        QCString pathDecoded = pathCodec->fromUnicode(s.left(pathEnd));
-        QCString otherDecoded = otherCodec->fromUnicode(s.mid(pathEnd));
+        QCString pathDecoded = pathEncoding.fromUnicode(s.left(pathEnd));
+        QCString otherDecoded = otherEncoding.fromUnicode(s.mid(pathEnd));
         int pathDecodedLength = pathDecoded.length();
         int otherDecodedLength = otherDecoded.length();
         strBuffer = static_cast<char *>(fastMalloc(pathDecodedLength + otherDecodedLength + 1));
index 1182aa0..f025acb 100644 (file)
@@ -27,8 +27,7 @@
 #define KURL_H_
 
 #include "QString.h"
-
-class QTextCodec;
+#include "TextEncoding.h"
 
 #if __APPLE__
 #ifdef __OBJC__
@@ -43,11 +42,11 @@ class NSURL;
 class KURL {
 public:
     KURL();
-    KURL(const char *);
-    KURL(const KURL &, const QString &, const QTextCodec * = 0);
-    KURL(const QString &);
+    KURL(const char*);
+    KURL(const KURL&, const QString&, const WebCore::TextEncoding& encoding = WebCore::TextEncoding(WebCore::UTF8Encoding));
+    KURL(const QString&);
 #if __APPLE__
-    KURL(NSURL *);
+    KURL(NSURL*);
 #endif
     
     bool isEmpty() const { return urlString.isEmpty(); } 
@@ -87,7 +86,7 @@ public:
 
     bool isLocalFile() const;
 
-    static QString decode_string(const QString &, const QTextCodec * = 0);
+    static QString decode_string(const QString &, const WebCore::TextEncoding& encoding = WebCore::TextEncoding(WebCore::UTF8Encoding));
     static QString encode_string(const QString &);
     
     friend bool operator==(const KURL &, const KURL &);
index e5591fd..f8367ea 100644 (file)
@@ -28,7 +28,7 @@
 
 #include "KWQLogging.h"
 #include "KWQRegExp.h"
-#include "KWQTextCodec.h"
+#include "TextEncoding.h"
 #include <kjs/dtoa.h>
 #include <stdio.h>
 #include <stdarg.h>
@@ -2636,3 +2636,13 @@ void freeHandle(KWQStringData **_free)
         freeNodeAllocationPages = pageNode;
     }
 }
+
+QString QString::fromUtf8(const char *chs)
+{
+    return WebCore::TextEncoding(WebCore::UTF8Encoding).toUnicode(chs, strlen(chs));
+}
+
+QString QString::fromUtf8(const char *chs, int len)
+{
+    return WebCore::TextEncoding(WebCore::UTF8Encoding).toUnicode(chs, len);
+}
diff --git a/WebCore/platform/StreamingTextDecoder.cpp b/WebCore/platform/StreamingTextDecoder.cpp
new file mode 100644 (file)
index 0000000..ad101ae
--- /dev/null
@@ -0,0 +1,323 @@
+/*
+ * Copyright (C) 2004, 2006 Apple Computer, Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#include "config.h"
+#include "StreamingTextDecoder.h"
+
+#include <kxmlcore/Assertions.h>
+
+namespace WebCore {
+
+StreamingTextDecoder::StreamingTextDecoder(const TextEncoding& encoding)
+    : m_encoding(encoding)
+    , m_littleEndian(encoding.flags() & LittleEndian)
+    , m_atStart(true)
+    , m_error(false)
+    , m_numBufferedBytes(0)
+    , m_converterICU(0)
+{
+}
+
+QString StreamingTextDecoder::convertLatin1(const unsigned char* s, int length)
+{
+    ASSERT(m_numBufferedBytes == 0);
+    return QString(reinterpret_cast<const char *>(s), length);
+}
+
+static const UChar replacementCharacter = 0xFFFD;
+static const UChar BOM = 0xFEFF;
+static const int ConversionBufferSize = 16384;
+    
+static UConverter* cachedConverterICU;
+static TextEncodingID cachedConverterEncoding = InvalidEncoding;
+
+StreamingTextDecoder::~StreamingTextDecoder()
+{
+    if (m_converterICU) {
+        if (cachedConverterICU != 0)
+            ucnv_close(cachedConverterICU);
+        cachedConverterICU = m_converterICU;
+        cachedConverterEncoding = m_encoding.encodingID();
+    }
+}
+
+QString StreamingTextDecoder::convertUTF16(const unsigned char *s, int length)
+{
+    ASSERT(m_numBufferedBytes == 0 || m_numBufferedBytes == 1);
+
+    const unsigned char *p = s;
+    unsigned len = length;
+    
+    QString result("");
+    
+    result.reserve(length / 2);
+
+    if (m_numBufferedBytes != 0 && len != 0) {
+        ASSERT(m_numBufferedBytes == 1);
+        UChar c;
+        if (m_littleEndian)
+            c = m_bufferedBytes[0] | (p[0] << 8);
+        else
+            c = (m_bufferedBytes[0] << 8) | p[0];
+
+        if (c)
+            result.append(reinterpret_cast<QChar *>(&c), 1);
+
+        m_numBufferedBytes = 0;
+        p += 1;
+        len -= 1;
+    }
+    
+    while (len > 1) {
+        UChar buffer[ConversionBufferSize];
+        int runLength = MIN(len / 2, sizeof(buffer) / sizeof(buffer[0]));
+        int bufferLength = 0;
+        if (m_littleEndian) {
+            for (int i = 0; i < runLength; ++i) {
+                UChar c = p[0] | (p[1] << 8);
+                p += 2;
+                if (c && c != BOM)
+                    buffer[bufferLength++] = c;
+            }
+        } else {
+            for (int i = 0; i < runLength; ++i) {
+                UChar c = (p[0] << 8) | p[1];
+                p += 2;
+                if (c && c != BOM)
+                    buffer[bufferLength++] = c;
+            }
+        }
+        result.append(reinterpret_cast<QChar *>(buffer), bufferLength);
+        len -= runLength * 2;
+    }
+    
+    if (len) {
+        ASSERT(m_numBufferedBytes == 0);
+        m_numBufferedBytes = 1;
+        m_bufferedBytes[0] = p[0];
+    }
+    
+    return result;
+}
+
+static inline TextEncoding effectiveEncoding(const TextEncoding& encoding)
+{
+    TextEncodingID id = encoding.encodingID();
+    if (id == Latin1Encoding || id == ASCIIEncoding)
+        id = WinLatin1Encoding;
+    return TextEncoding(id, encoding.flags());
+}
+
+UErrorCode StreamingTextDecoder::createICUConverter()
+{
+    TextEncoding encoding = effectiveEncoding(m_encoding);
+    const char* encodingName = encoding.name();
+
+    bool cachedEncodingEqual = cachedConverterEncoding == encoding.encodingID();
+    cachedConverterEncoding = InvalidEncoding;
+
+    if (cachedEncodingEqual && cachedConverterICU) {
+        m_converterICU = cachedConverterICU;
+        cachedConverterICU = 0;
+    } else {    
+        UErrorCode err = U_ZERO_ERROR;
+        ASSERT(!m_converterICU);
+        m_converterICU = ucnv_open(encodingName, &err);
+        if (err == U_AMBIGUOUS_ALIAS_WARNING)
+            LOG_ERROR("ICU ambiguous alias warning for encoding: %s", encodingName);
+
+        if (!m_converterICU) {
+            LOG_ERROR("the ICU Converter won't convert from text encoding 0x%X, error %d", encoding, err);
+            return err;
+        }
+    }
+    
+    return U_ZERO_ERROR;
+}
+
+// We strip replacement characters because the ICU converter for UTF-8 converts
+// invalid sequences into replacement characters, but other browsers discard them.
+// We strip BOM characters because they can show up both at the start of content
+// and inside content, and we never want them to end up in the decoded text.
+static inline bool unwanted(UChar c)
+{
+    return c == replacementCharacter || c == BOM;
+}
+
+void StreamingTextDecoder::appendOmittingUnwanted(QString &s, const UChar *characters, int byteCount)
+{
+    ASSERT(byteCount % sizeof(UChar) == 0);
+    int start = 0;
+    int characterCount = byteCount / sizeof(UChar);
+    for (int i = 0; i != characterCount; ++i) {
+        if (unwanted(characters[i])) {
+            if (start != i)
+                s.append(reinterpret_cast<const QChar *>(&characters[start]), i - start);
+            start = i + 1;
+        }
+    }
+    if (start != characterCount)
+        s.append(reinterpret_cast<const QChar *>(&characters[start]), characterCount - start);
+}
+
+QString StreamingTextDecoder::convertUsingICU(const unsigned char *chs, int len, bool flush)
+{
+    // Get a converter for the passed-in encoding.
+    if (!m_converterICU && U_FAILURE(createICUConverter()))
+        return QString();
+
+    ASSERT(_converterICU);
+
+    QString result("");
+    result.reserve(len);
+
+    UChar buffer[ConversionBufferSize];
+    const char *source = reinterpret_cast<const char *>(chs);
+    const char *sourceLimit = source + len;
+    int32_t *offsets = NULL;
+    UErrorCode err;
+    
+    do {
+        UChar *target = buffer;
+        const UChar *targetLimit = target + ConversionBufferSize;
+        err = U_ZERO_ERROR;
+        ucnv_toUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, offsets, flush, &err);
+        int count = target - buffer;
+        appendOmittingUnwanted(result, reinterpret_cast<const UChar *>(buffer), count * sizeof(UChar));
+    } while (err == U_BUFFER_OVERFLOW_ERROR);
+
+    if (U_FAILURE(err)) {
+        // flush the converter so it can be reused, and not be bothered by this error.
+        do {
+            UChar *target = buffer;
+            const UChar *targetLimit = target + ConversionBufferSize;
+            err = U_ZERO_ERROR;
+            ucnv_toUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, offsets, true, &err);
+        } while (source < sourceLimit);
+        LOG_ERROR("ICU conversion error");
+        return QString();
+    }
+    
+    return result;
+}
+
+QString StreamingTextDecoder::convert(const unsigned char *chs, int len, bool flush)
+{
+    //#define PARTIAL_CHARACTER_HANDLING_TEST_CHUNK_SIZE 1000
+
+    switch (m_encoding.encodingID()) {
+    case Latin1Encoding:
+    case WinLatin1Encoding:
+        return convertLatin1(chs, len);
+
+    case UTF16Encoding:
+        return convertUTF16(chs, len);
+
+    default:
+#if PARTIAL_CHARACTER_HANDLING_TEST_CHUNK_SIZE
+        QString result;
+        int chunkSize;
+        for (int i = 0; i != len; i += chunkSize) {
+            chunkSize = len - i;
+            if (chunkSize > PARTIAL_CHARACTER_HANDLING_TEST_CHUNK_SIZE) {
+                chunkSize = PARTIAL_CHARACTER_HANDLING_TEST_CHUNK_SIZE;
+            }
+            result += convertUsingICU(chs + i, chunkSize, flush && (i + chunkSize == len));
+        }
+        return result;
+#else
+        return convertUsingICU(chs, len, flush);
+#endif
+    }
+    ASSERT_NOT_REACHED();
+    return QString();
+}
+
+QString StreamingTextDecoder::toUnicode(const char *chs, int len, bool flush)
+{
+    ASSERT_ARG(len, len >= 0);
+    
+    if (m_error || !chs)
+        return QString();
+
+    if (len <= 0 && !flush)
+        return "";
+
+    // Handle normal case.
+    if (!m_atStart)
+        return convert(chs, len, flush);
+
+    // Check to see if we found a BOM.
+    int numBufferedBytes = m_numBufferedBytes;
+    int buf1Len = numBufferedBytes;
+    int buf2Len = len;
+    const unsigned char *buf1 = m_bufferedBytes;
+    const unsigned char *buf2 = reinterpret_cast<const unsigned char *>(chs);
+    unsigned char c1 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
+    unsigned char c2 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
+    unsigned char c3 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
+    int BOMLength = 0;
+    if (c1 == 0xFF && c2 == 0xFE) {
+        m_encoding = TextEncoding(UTF16Encoding, LittleEndian);
+        m_littleEndian = true;
+        BOMLength = 2;
+    } else if (c1 == 0xFE && c2 == 0xFF) {
+        m_encoding = TextEncoding(UTF16Encoding, BigEndian);
+        m_littleEndian = false;
+        BOMLength = 2;
+    } else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) {
+        m_encoding = TextEncoding(UTF8Encoding);
+        BOMLength = 3;
+    }
+
+    // Handle case where we found a BOM.
+    if (BOMLength != 0) {
+        ASSERT(numBufferedBytes + len >= BOMLength);
+        int skip = BOMLength - numBufferedBytes;
+        m_numBufferedBytes = 0;
+        m_atStart = false;
+        return len == skip ? QString("") : convert(chs + skip, len - skip, flush);
+    }
+
+    // Handle case where we know there is no BOM coming.
+    const int bufferSize = sizeof(m_bufferedBytes);
+    if (numBufferedBytes + len > bufferSize || flush) {
+        m_atStart = false;
+        if (numBufferedBytes == 0) {
+            return convert(chs, len, flush);
+        }
+        unsigned char bufferedBytes[sizeof(m_bufferedBytes)];
+        memcpy(bufferedBytes, m_bufferedBytes, numBufferedBytes);
+        m_numBufferedBytes = 0;
+        return convert(bufferedBytes, numBufferedBytes, false) + convert(chs, len, flush);
+    }
+
+    // Continue to look for the BOM.
+    memcpy(&m_bufferedBytes[numBufferedBytes], chs, len);
+    m_numBufferedBytes += len;
+    return "";
+}
+    
+} // namespace WebCore
diff --git a/WebCore/platform/StreamingTextDecoder.h b/WebCore/platform/StreamingTextDecoder.h
new file mode 100644 (file)
index 0000000..364320c
--- /dev/null
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2004, 2006 Apple Computer, Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef StreamingTextDecoder_H
+#define StreamingTextDecoder_H
+
+#include "TextEncoding.h"
+#include <kxmlcore/Noncopyable.h>
+#include <unicode/ucnv.h>
+#include <unicode/utypes.h>
+
+namespace WebCore {
+
+    class StreamingTextDecoder : Noncopyable {
+    public:
+        StreamingTextDecoder(const TextEncoding&);
+        ~StreamingTextDecoder();
+        
+        QString toUnicode(const char* chs, int len, bool flush = false);
+        
+    private:
+        QString convert(const char *chs, int len, bool flush)
+            { return convert(reinterpret_cast<const unsigned char *>(chs), len, flush); }
+        QString convert(const unsigned char *chs, int len, bool flush);
+        QString convertLatin1(const unsigned char *chs, int len);
+        QString convertUTF16(const unsigned char *chs, int len);
+        
+        // ICU decoding.
+        QString convertUsingICU(const unsigned char *chs, int len, bool flush);
+        UErrorCode createICUConverter();
+
+        static void appendOmittingUnwanted(QString &s, const UniChar *characters, int byteCount);
+
+        TextEncoding m_encoding;
+        bool m_littleEndian;
+        bool m_atStart;
+        bool m_error;
+        
+        unsigned m_numBufferedBytes;
+        unsigned char m_bufferedBytes[16]; // bigger than any single multi-byte character
+        
+        // ICU decoding.
+        UConverter *m_converterICU;
+    };
+    
+} // namespace WebCore
+
+#endif // StreamingTextDecoder_H
diff --git a/WebCore/platform/TextEncoding.cpp b/WebCore/platform/TextEncoding.cpp
new file mode 100644 (file)
index 0000000..a193a5d
--- /dev/null
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2004, 2006 Apple Computer, Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#include "config.h"
+#include "TextEncoding.h"
+
+#include "CharsetNames.h"
+#include <kxmlcore/Assertions.h>
+#include <kxmlcore/HashSet.h>
+#include "StreamingTextDecoder.h"
+
+namespace WebCore {
+
+const UniChar replacementCharacter = 0xFFFD;
+const UniChar BOM = 0xFEFF;
+
+static const int ConversionBufferSize = 16384;
+
+TextEncoding::TextEncoding(const char* name, bool eightBitOnly)
+{
+    m_encodingID = textEncodingIDFromCharsetName(name, &m_flags);
+    if (eightBitOnly && m_encodingID == UTF16Encoding)
+        m_encodingID = UTF8Encoding;
+}
+
+const char* TextEncoding::name() const
+{
+    return charsetNameFromTextEncodingID(m_encodingID);
+}
+
+inline TextEncodingID effectiveEncoding(TextEncodingID encoding)
+{
+    if (encoding == Latin1Encoding || encoding == ASCIIEncoding)
+        return WinLatin1Encoding;
+    return encoding;
+}
+
+QChar TextEncoding::backslashAsCurrencySymbol() const
+{
+    if (m_flags & BackslashIsYen)
+        return 0x00A5; // yen sign
+    return '\\';
+}
+
+QString TextEncoding::toUnicode(const char *chs, int len) const
+{
+    return StreamingTextDecoder(*this).toUnicode(chs, len, true);
+}
+
+QString TextEncoding::toUnicode(const ByteArray &qba, int len) const
+{
+    return StreamingTextDecoder(*this).toUnicode(qba, len, true);
+}
+
+} // namespace WebCore
diff --git a/WebCore/platform/TextEncoding.h b/WebCore/platform/TextEncoding.h
new file mode 100644 (file)
index 0000000..e45483a
--- /dev/null
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2004, 2006 Apple Computer, Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef TextEncoding_H
+#define TextEncoding_H
+
+#include "QString.h"
+#include "KWQCString.h"
+
+namespace WebCore {
+
+    class StreamingTextDecoder;
+
+#ifdef __APPLE__
+    typedef CFStringEncoding TextEncodingID;
+    
+    const TextEncodingID InvalidEncoding = kCFStringEncodingInvalidId;
+    const TextEncodingID UTF8Encoding = kCFStringEncodingUTF8;
+    const TextEncodingID UTF16Encoding = kCFStringEncodingUnicode;
+    const TextEncodingID Latin1Encoding = kCFStringEncodingISOLatin1;
+    const TextEncodingID ASCIIEncoding = kCFStringEncodingASCII;
+    const TextEncodingID WinLatin1Encoding = kCFStringEncodingWindowsLatin1;
+#else
+    enum TextEncodingID {
+        InvalidEncoding = -1,
+        UTF8Encoding,
+        UTF16Encoding,
+        Latin1Encoding,
+        ASCIIEncoding,
+        WinLatin1Encoding
+    };
+#endif
+
+    enum TextEncodingFlags {
+        NoEncodingFlags = 0,
+        VisualOrdering = 1,
+        BigEndian = 2,
+        LittleEndian = 4,
+        IsJapanese = 8,
+        BackslashIsYen = 16
+    };
+
+    class TextEncoding {
+    public:
+        enum { 
+            EightBitOnly = true 
+        };
+
+        explicit TextEncoding(TextEncodingID encodingID, TextEncodingFlags flags = NoEncodingFlags) 
+            : m_encodingID(encodingID)
+            , m_flags(flags) 
+        { 
+        }
+
+        explicit TextEncoding(const char*, bool eightBitOnly = false);
+
+        bool isValid() const { return m_encodingID != InvalidEncoding; }
+        const char* name() const;
+        bool usesVisualOrdering() const { return m_flags & VisualOrdering; }
+        bool isJapanese() const { return m_flags & IsJapanese; }
+        
+        QChar backslashAsCurrencySymbol() const;
+        
+        QCString fromUnicode(const QString&, bool allowEntities = false) const;
+
+        QString toUnicode(const char*, int length) const;
+        QString toUnicode(const ByteArray&, int length) const;
+
+        TextEncodingID encodingID() const { return m_encodingID; }
+        TextEncodingFlags flags() const { return m_flags; }
+        
+    private:
+        TextEncodingID m_encodingID;
+        TextEncodingFlags m_flags;
+    };
+
+    inline bool operator==(const TextEncoding& a, const TextEncoding& b)
+    {
+        return a.encodingID() == b.encodingID() && a.flags() == b.flags();
+    }
+    
+    inline bool operator!=(const TextEncoding& a, const TextEncoding& b)
+    {
+        return a.encodingID() != b.encodingID() || a.flags() != b.flags();
+    }
+    
+} // namespace WebCore
+
+#endif // TextEncoding_H
diff --git a/WebCore/platform/mac/ExtraCFEncodings.h b/WebCore/platform/mac/ExtraCFEncodings.h
new file mode 100644 (file)
index 0000000..32145e0
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2003, 2006 Apple Computer, Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef ExtraCFEncodings_H
+#define ExtraCFEncodings_H
+
+// Until there's a CFString constant for these encodings, this works.
+// Since they are macros, they won't cause a compile failure even if the CFString constant is added.
+#define kCFStringEncodingBig5_DOSVariant (kTextEncodingBig5 | (kBig5_DOSVariant << 16))
+#define kCFStringEncodingEUC_CN_DOSVariant (kTextEncodingEUC_CN | (kEUC_CN_DOSVariant << 16))
+#define kCFStringEncodingEUC_KR_DOSVariant (kTextEncodingEUC_KR | (kEUC_KR_DOSVariant << 16))
+#define kCFStringEncodingISOLatin10 kTextEncodingISOLatin10
+#define kCFStringEncodingKOI8_U kTextEncodingKOI8_U
+#define kCFStringEncodingShiftJIS_DOSVariant (kTextEncodingShiftJIS | (kShiftJIS_DOSVariant << 16))
+
+#endif // ExtraCFEncodings_H
index f2718ca..d57c0cb 100644 (file)
@@ -28,7 +28,9 @@
 
 #import <kxmlcore/Assertions.h>
 #import <Foundation/Foundation.h>
-#import "KWQTextCodec.h"
+#import "TextEncoding.h"
+
+using namespace WebCore;
 
 void QString::setBufferFromCFString(CFStringRef cfs)
 {
@@ -100,12 +102,3 @@ QCString QString::utf8(int &length) const
     return qcs;
 }
 
-QString QString::fromUtf8(const char *chs)
-{
-    return QTextCodec(kCFStringEncodingUTF8).toUnicode(chs, strlen(chs));
-}
-
-QString QString::fromUtf8(const char *chs, int len)
-{
-    return QTextCodec(kCFStringEncodingUTF8).toUnicode(chs, len);
-}
diff --git a/WebCore/platform/mac/TextEncodingMac.cpp b/WebCore/platform/mac/TextEncodingMac.cpp
new file mode 100644 (file)
index 0000000..3961ea9
--- /dev/null
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2004, 2006 Apple Computer, Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#include "config.h"
+#include "TextEncoding.h"
+
+#include <kxmlcore/Assertions.h>
+#include <kxmlcore/HashSet.h>
+#include "QString.h"
+#include "StreamingTextDecoder.h"
+
+namespace WebCore {
+
+static inline TextEncodingID effectiveEncoding(TextEncodingID encoding)
+{
+    if (encoding == Latin1Encoding || encoding == ASCIIEncoding)
+        return WinLatin1Encoding;
+    return encoding;
+}
+
+QCString TextEncoding::fromUnicode(const QString &qcs, bool allowEntities) const
+{
+    // FIXME: We should really use the same API in both directions.
+    // Currently we use ICU to decode and CFString to encode; it would be better to encode with ICU too.
+    
+    TextEncodingID encoding = effectiveEncoding(m_encodingID);
+
+    // FIXME: Since there's no "force ASCII range" mode in CFString, we change the backslash into a yen sign.
+    // Encoding will change the yen sign back into a backslash.
+    QString copy = qcs;
+    copy.replace(QChar('\\'), backslashAsCurrencySymbol());
+    CFStringRef cfs = copy.getCFString();
+    
+    CFIndex startPos = 0;
+    CFIndex charactersLeft = CFStringGetLength(cfs);
+    QCString result(1); // for trailng zero
+
+    while (charactersLeft > 0) {
+        CFRange range = CFRangeMake(startPos, charactersLeft);
+        CFIndex bufferLength;
+        CFStringGetBytes(cfs, range, encoding, allowEntities ? 0 : '?', false, NULL, 0x7FFFFFFF, &bufferLength);
+        
+        QCString chunk(bufferLength + 1);
+        CFIndex charactersConverted = CFStringGetBytes(cfs, range, encoding, allowEntities ? 0 : '?', false, reinterpret_cast<unsigned char *>(chunk.data()), bufferLength, &bufferLength);
+        chunk[bufferLength] = 0;
+        result.append(chunk);
+        
+        if (charactersConverted != charactersLeft) {
+            // FIXME: support surrogate pairs
+            UniChar badChar = CFStringGetCharacterAtIndex(cfs, startPos + charactersConverted);
+            char buf[16];
+            sprintf(buf, "&#%u;", badChar);
+            result.append(buf);
+            
+            ++charactersConverted;
+        }
+        
+        startPos += charactersConverted;
+        charactersLeft -= charactersConverted;
+    }
+    return result;
+}
+
+} // namespace WebCore
similarity index 97%
rename from WebCore/kwq/mac-encodings.txt
rename to WebCore/platform/mac/mac-encodings.txt
index 6d82edf..3df6f14 100644 (file)
@@ -113,7 +113,7 @@ JIS_X0208_83, IsJapanese: JIS_X0208-1983
 JIS_X0208_90, IsJapanese: JIS_X0208-1990
 JIS_X0212_90, IsJapanese: JIS_X0212-1990
 JIS_C6226_78, IsJapanese: JIS_C6226-1978
-ShiftJIS_X0213_00, IsJapanese: Shift_JIS_X0213-2000
+ShiftJIS_X0213_00, IsJapanese|BackslashIsYen: Shift_JIS_X0213-2000
 GB_2312_80
 GBK_95
 GB_18030_2000: GB18030
@@ -131,7 +131,7 @@ ISO_2022_CN: ISO-2022-CN
 ISO_2022_CN_EXT: ISO-2022-CN-EXT
 ISO_2022_KR: ISO-2022-KR
 
-EUC_JP, IsJapanese: EUC-JP, xeuc, xeucjp
+EUC_JP, IsJapanese|BackslashIsYen: EUC-JP, xeuc, xeucjp
 EUC_CN_DOSVariant: EUC-CN, cngb, csgb231280, gb2312, gb231280, gbk, xeuccn, xgbk
 EUC_TW: EUC-TW
 EUC_KR_DOSVariant: EUC-KR, cp949, ksc5601
similarity index 68%
rename from WebCore/kwq/make-charset-table.pl
rename to WebCore/platform/make-charset-table.pl
index ca4b0c6..a233d32 100755 (executable)
@@ -20,38 +20,40 @@ sub error ($)
 
 sub emit_line
 {
-    my ($name, $encoding, $flags) = @_;
+    my ($name, $prefix, $encoding, $flags) = @_;
  
     error "$name shows up twice in output" if $namesWritten{$name};
     $namesWritten{$name} = 1;
     
-    $output .= "    { \"$name\", kCFStringEncoding$encoding, $flags },\n";
+    $output .= "    { \"$name\", $prefix$encoding, $flags },\n";
 }
 
-sub process_mac_encodings
+sub process_platform_encodings
 {
-    my ($filename) = @_;
+    my ($filename, $PlatformPrefix) = @_;
+    my $baseFilename = $filename;
+    $baseFilename =~ s|.*/||;
     
-    my %seenMacNames;
+    my %seenPlatformNames;
     my %seenIANANames;
     
-    open MAC_ENCODINGS, $filename or die;
+    open PLATFORM_ENCODINGS, $filename or die;
     
-    while (<MAC_ENCODINGS>) {
+    while (<PLATFORM_ENCODINGS>) {
         chomp;
         s/\#.*$//;
         s/\s+$//;
-       if (my ($MacName, undef, $flags, $IANANames) = /^(.+?)(, (.+))?: (.+)$/) {
+       if (my ($PlatformName, undef, $flags, $IANANames) = /^(.+?)(, (.+))?: (.+)$/) {
             my %aliases;
             
-            my $MacNameWithFlags = $MacName;
+            my $PlatformNameWithFlags = $PlatformName;
             if ($flags) {
-                $MacNameWithFlags .= ", " . $flags;
+                $PlatformNameWithFlags .= ", " . $flags;
             } else {
                 $flags = "NoEncodingFlags";
             }
-            error "CFString encoding name $MacName is mentioned twice in mac-encodings.txt" if $seenMacNames{$MacNameWithFlags};
-            $seenMacNames{$MacNameWithFlags} = 1;
+            error "CFString encoding name $PlatformName is mentioned twice in $baseFilename" if $seenPlatformNames{$PlatformNameWithFlags};
+            $seenPlatformNames{$PlatformNameWithFlags} = 1;
 
             # Build the aliases list.
             # Also check that no two names are part of the same entry in the charsets file.
@@ -62,17 +64,17 @@ sub process_mac_encodings
             for my $name (@IANANames) {
                 if ($firstName eq "") {
                     if ($name !~ /^[-A-Za-z0-9_]+$/) {
-                        error "$name, in mac-encodings.txt, has illegal characters in it";
+                        error "$name, in $baseFilename, has illegal characters in it";
                         next;
                     }
                     $firstName = $name;
                 } else {
                     if ($name !~ /^[a-z0-9]+$/) {
-                        error "$name, in mac-encodings.txt, has illegal characters in it (must be all lowercase alphanumeric)";
+                        error "$name, in $baseFilename, has illegal characters in it (must be all lowercase alphanumeric)";
                         next;
                     }
                     if ($name le $prevName) {
-                        error "$name comes after $prevName in mac-encodings.txt, but everything must be in alphabetical order";
+                        error "$name comes after $prevName in $baseFilename, but everything must be in alphabetical order";
                     }
                     $prevName = $name;
                 }
@@ -82,7 +84,7 @@ sub process_mac_encodings
                 
                 $canonicalFirstName = $canonicalName if $canonicalFirstName eq "";
                 
-                error "$name is mentioned twice in mac-encodings.txt" if $seenIANANames{$canonicalName};
+                error "$name is mentioned twice in $baseFilename" if $seenIANANames{$canonicalName};
                 $seenIANANames{$canonicalName} = 1;
                 
                 $aliases{$canonicalName} = 1;
@@ -95,27 +97,27 @@ sub process_mac_encodings
                     if ($aliasesFromCharsetsFile{$otherName}
                         && $aliasesFromCharsetsFile{$canonicalName} eq $aliasesFromCharsetsFile{$otherName}
                         && $canonicalName le $otherName) {
-                        error "mac-encodings.txt lists both $name and $otherName under $MacName, but that aliasing is already specified in character-sets.txt";
+                        error "$baseFilename lists both $name and $otherName under $PlatformName, but that aliasing is already specified in character-sets.txt";
                     }
                 }
             }
             
             # write out
-            emit_line($firstName, $MacName, $flags);
+            emit_line($firstName, $PlatformPrefix, $PlatformName, $flags);
             for my $alias (sort keys %aliases) {
-                emit_line($alias, $MacName, $flags) if $alias ne $canonicalFirstName;
+                emit_line($alias, $PlatformPrefix, $PlatformName, $flags) if $alias ne $canonicalFirstName;
             }
        } elsif (/^([a-zA-Z0-9_]+)(, (.+))?$/) {
-            my $MacName = $1;
+            my $PlatformName = $1;
             
-            error "CFString encoding name $MacName is mentioned twice in mac-encodings.txt" if $seenMacNames{$MacName};
-            $seenMacNames{$MacName} = 1;
+            error "Platform encoding name $PlatformName is mentioned twice in $baseFilename" if $seenPlatformNames{$PlatformName};
+            $seenPlatformNames{$PlatformName} = 1;
         } elsif (/./) {
-            error "syntax error in mac-encodings.txt, line $.";
+            error "syntax error in platform-encodings.txt, line $.";
         }
     }
     
-    close MAC_ENCODINGS;
+    close PLATFORM_ENCODINGS;
 }
 
 sub process_iana_charset 
@@ -177,10 +179,10 @@ sub process_iana_charsets
 # Program body
 
 process_iana_charsets($ARGV[0]);
-process_mac_encodings($ARGV[1]);
+process_platform_encodings($ARGV[1], $ARGV[2]);
 
 exit 1 if $error;
 
 print "static const CharsetEntry table[] = {\n";
 print $output;
-print "    { 0, kCFStringEncodingInvalidId, NoEncodingFlags }\n};\n";
+print "    { 0, WebCore::InvalidEncoding, NoEncodingFlags }\n};\n";
diff --git a/WebCore/platform/win/win-encodings.txt b/WebCore/platform/win/win-encodings.txt
new file mode 100644 (file)
index 0000000..0b9bfc7
--- /dev/null
@@ -0,0 +1,12 @@
+# The items on the left are names of TextEncodingID values
+# The items on the right are IANA character set names. Names listed in character-sets.txt are not
+# repeated here; mentioning any one character set from a group in there pulls in all the aliases in
+# that group.
+
+WinLatin1Encoding: windows-1252, winlatin1, xansi
+Latin1Encoding: ISO-8859-1, 88591
+ASCIIEncoding: US-ASCII, isoir6us
+UTF16Encoding: ISO-10646-UCS-2, ucs2, unicode, utf16
+UTF16Encoding, BigEndian: UTF-16BE, unicodefffe
+UTF16Encoding, LittleEndian: UTF-16LE, unicodefeff
+UTF8Encoding: UTF-8, unicode11utf8, unicode20utf8, xunicode20utf8
index 30d8f46..7dedfaa 100644 (file)
@@ -56,7 +56,7 @@
 #include "VisiblePosition.h"
 #include <qmatrix.h>
 #include <qpainter.h>
-#include <qtextcodec.h>
+#include "TextEncoding.h"
 #include <qtextstream.h>
 
 namespace WebCore {
@@ -2504,10 +2504,7 @@ QChar RenderObject::backslashAsCurrencySymbol() const
     Decoder *decoder = document->decoder();
     if (!decoder)
         return '\\';
-    const QTextCodec *codec = decoder->codec();
-    if (!codec)
-        return '\\';
-    return codec->backslashAsCurrencySymbol();
+    return decoder->encoding().backslashAsCurrencySymbol();
 }
 
 void RenderObject::imageChanged(CachedImage *image)
index 439795c..be41812 100644 (file)
@@ -35,7 +35,7 @@
 #include <kio/job.h>
 #include <kjs/protect.h>
 #include <qregexp.h>
-#include <qtextcodec.h>
+#include "TextEncoding.h"
 
 using namespace KIO;
 
@@ -251,11 +251,11 @@ void XMLHttpRequest::send(const DOMString& _body)
       if (charset.isEmpty())
         charset = "UTF-8";
       
-      QTextCodec *codec = QTextCodec::codecForName(charset.latin1());
-      if (!codec)   // FIXME: report an error?
-        codec = QTextCodec::codecForName("UTF-8");
+      TextEncoding encoding = TextEncoding(charset.latin1());
+      if (!encoding.isValid())   // FIXME: report an error?
+        encoding = TextEncoding(UTF8Encoding);
 
-      job = new TransferJob(async ? this : 0, url, codec->fromUnicode(_body.qstring()));
+      job = new TransferJob(async ? this : 0, url, encoding.fromUnicode(_body.qstring()));
   }
   else
      job = new TransferJob(async ? this : 0, url);
@@ -500,10 +500,10 @@ void XMLHttpRequest::receivedData(TransferJob*, const char *data, int len)
     
     decoder = new Decoder;
     if (!encoding.isEmpty())
-      decoder->setEncoding(encoding.latin1(), Decoder::EncodingFromHTTPHeader);
+      decoder->setEncodingName(encoding.latin1(), Decoder::EncodingFromHTTPHeader);
     else
       // only allow Decoder to look inside the response if it's XML
-      decoder->setEncoding("UTF-8", responseIsXML() ? Decoder::DefaultEncoding : Decoder::EncodingFromHTTPHeader);
+      decoder->setEncodingName("UTF-8", responseIsXML() ? Decoder::DefaultEncoding : Decoder::EncodingFromHTTPHeader);
   }
   if (len == 0)
     return;