+2006-09-05 Darin Adler <darin@apple.com>
+
+ Reviewed by Alexey.
+
+ - test for bug where the charset in a link element for a CSS stylesheet is ignored.
+ I noticed this while working on new text encoding machinery.
+
+ * fast/encoding/css-link-charset-expected.txt: Added.
+ * fast/encoding/css-link-charset.css: Added.
+ * fast/encoding/css-link-charset.html: Added.
+
+ - test for http://bugzilla.opendarwin.org/show_bug.cgi?id=10681
+ REGRESSION: Reproducible crash at Wikipedia
+ (Alexey wrote this one, I reviewed.)
+
+ * fast/forms/form-data-encoding-normalization-overrun-expected.txt: Added.
+ * fast/forms/form-data-encoding-normalization-overrun.html: Added.
+
+ - and a tweak to an existing test
+
+ * fast/forms/form-data-encoding.html: Changed to dump encoded URL so it's easier to
+ diagnose this when it fails.
+
2006-09-05 Justin Garcia <justin.garcia@apple.com>
Reviewed by thatcher
--- /dev/null
+Test to see if the charset property in a link works properly.
+
+SUCCESS
+
+
--- /dev/null
+#dummy:before { content: "SUССЕSS"; }
--- /dev/null
+<html>
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=windows-1251">
+ <link rel="stylesheet" type="text/css" href="css-link-charset.css" charset="utf-8">
+</head>
+<body onload="test()">
+<p>Test to see if the charset property in a link works properly.</p>
+<p id="result"></p>
+<script>
+function test() {
+ if (window.layoutTestController)
+ layoutTestController.dumpAsText();
+ try {
+ text = document.styleSheets[0].rules[0].style.cssText;
+ if (text.match('.*SUÑÑÅSS.*'))
+ result.innerHTML = "SUCCESS";
+ else
+ result.innerHTML = "FAILURE: " + text;
+ } catch (ex) {
+ result.innerHTML = "FAILURE: " + ex;
+ }
+}
+</script>
+</body>
+</html>
--- /dev/null
+Test for bug 10681: REGRESSION: Reproducible crash at Wikipedia
+
+
+SUCCESS
+
+
--- /dev/null
+<html>
+<head>
+<meta http-equiv="content-type" content="text/html; charset=iso-8859-1">
+<title>Charsets and submitting forms</title>
+</head>
+<body>
+<p>Test for <a href="http://bugzilla.opendarwin.org/show_bug.cgi?id=10681">bug 10681</a>:
+REGRESSION: Reproducible crash at Wikipedia</p>
+
+<form method="post" action="?" name=f>
+ <input type=hidden name=q value="é"><br>
+</form>
+<script>
+if (window.layoutTestController) {
+ layoutTestController.dumpAsText();
+ layoutTestController.waitUntilDone();
+}
+
+if (document.URL.substring(0, 4) == "file") {
+
+ if (document.URL.indexOf('?') == -1) {
+ for (i=0; i < 15; ++i) {
+ document.f.q.value += document.f.q.value;
+ }
+ document.f.submit();
+
+ } else {
+ <!-- no crash == success -->
+ document.write("<p>SUCCESS</p>");
+
+ if (window.layoutTestController)
+ layoutTestController.notifyDone();
+ }
+
+} else {
+
+ document.write("<p>This test doesn't work directly from bugzilla, please save it to a local file first.</p>");
+}
+</script>
+</body>
+</html>
if (unescape(document.URL.substring(document.URL.indexOf('?')+1, document.URL.length)) == unescape("q=%26%231105%3B%26%2366560%3B%26%23119083%3B%E7%E7"))
document.write("<p>Success</p>");
else
- document.write("<p>Failure</p>");
+ document.write("<p>Failure, query is " + document.URL.substring(document.URL.indexOf('?')+1, document.URL.length) + "</p>");
if (window.layoutTestController)
layoutTestController.notifyDone();
+2006-09-05 Darin Adler <darin@apple.com>
+
+ Reviewed by Alexey.
+
+ - http://bugzilla.opendarwin.org/show_bug.cgi?id=10728
+ text encodings should work without a numeric ID
+
+ - includes a fix for http://bugzilla.opendarwin.org/show_bug.cgi?id=10681
+ REGRESSION: Reproducible crash at Wikipedia
+
+ - fixed a bug where link elements would not set the charset properly for
+ CSS stylesheets they loaded
+
+ - converted DeprecatedString code paths that are related to decoding web
+ pages to use String instead, to ensure that conversion back and forth won't
+ hurt performance
+
+ Test: fast/encoding/css-link-charset.html
+ Test: fast/forms/form-data-encoding-normalization-overrun.html
+
+ Coverage for encoding issues is pretty good, so we probably don't need more
+ tests to land this. Our existing tests did find issues with this patch while
+ it was under development. And I suppose it would be nice to have even more tests.
+
+ * platform/TextEncoding.h:
+ * platform/TextEncoding.cpp:
+ (WebCore::addEncodingName): Added. Used to build up the set used by isJapanese.
+ (WebCore::TextEncoding::TextEncoding): Removed boolean "eight bit only" parameter and
+ added an overload for String as well as const char*. Simplified because now the only
+ data member is m_name -- calls the registry's atomicCanonicalTextEncodingName function
+ to make the name canonical (resolve aliases) and atomic (use a single pointer for each
+ encoding name so we can compare and hash efficiently).
+ (WebCore::TextEncoding::decode): Renamed from toUnicode. Just a simple wrapper on top
+ of TextDecoder that can be used when the data to decode is all present at once.
+ (WebCore::TextEncoding::encode): Renamed from fromUnicode. Handles the normalization and
+ then uses the registry to get a codec to handle the rest.
+ (WebCore::TextEncoding::usesVisualOrdering): New implementation that compares with the
+ name of the only encoding that uses visual ordering. We blur the concepts a bit so that
+ we treat the visual ordering and logical ordering variations as two separate encodings.
+ (WebCore::TextEncoding::isJapanese): New implementation that uses a set to efficiently
+ determine if an encoding is Japanese.
+ (WebCore::TextEncoding::backslashAsCurrencySymbol): New implementation that compares
+ with the names of the two encodings that have the strange backslash.
+ (WebCore::TextEncoding::closest8BitEquivalent): Added. Replaces the old "eight bit only"
+ boolean parameter to the constructor.
+ (WebCore::ASCIIEncoding): Added.
+ (WebCore::Latin1Encoding): Added.
+ (WebCore::UTF16BigEndianEncoding): Added.
+ (WebCore::UTF16LittleEndianEncoding): Added.
+ (WebCore::UTF8Encoding): Added.
+ (WebCore::WindowsLatin1Encoding): Added.
+
+ * platform/TextEncodingRegistry.h: Added.
+ * platform/TextEncodingRegistry.cpp: Added. Keeps a table of all the character set
+ aliases and names and another of all the codecs and parameters for each name.
+
+ * platform/TextDecoder.h: Added.
+ * platform/TextDecoder.cpp: Added. Contains logic to look for a BOM and hand the data
+ to the proper codec, based on code that used to be in both the ICU and Mac codecs.
+
+ * platform/StreamingTextDecoder.h: Renamed class to TextCodec. We'll rename
+ the files in a later check-in. Moved creation functions into TextEncodingRegistry.h.
+ Added typedefs of the registrar function types so classes derived from this one
+ can use them without including the TextEncodingRegistry header. Renamed toUnicode
+ and fromUnicode to decode and encode. Changed the parameter and return types so that
+ the parameters are pointers to the data and the return types are String and CString.
+ * platform/StreamingTextDecoder.cpp:
+ (WebCore::TextCodec::appendOmittingBOM): Added. Helper function used by multiple
+ classes derived from this one.
+
+ * platform/TextCodecLatin1.h: Added.
+ * platform/TextCodecLatin1.cpp: Added. Contains logic to handle encoding and decoding
+ Windows Latin-1, based on code that used to be in both the ICU and Mac codecs.
+
+ * platform/TextCodecUTF16.h: Added.
+ * platform/TextCodecUTF16.cpp: Added. Contains logic to handle encoding and decoding
+ UTF-16, based on code that used to be in both the ICU and Mac codecs.
+
+ * platform/StreamingTextDecoderICU.h: Renamed class to TextCodecICU. We'll rename
+ the files in a later check-in. Removed all the functions having to do with handling
+ BOM, UTF-16, and Latin-1; those are now handled elsewhere. Removed textEncodingSupported
+ because that's superseded by the registry. Added registry hook functions.
+ * platform/StreamingTextDecoderICU.cpp:
+ (WebCore::TextCodecICU::registerEncodingNames): Added. Registers all encodings that
+ ICU can handle with the "IANA" standard. Also includes a special case for a particular
+ type of encoding for Hebrew that uses logical ordering. Also includes aliases that are
+ not in ICU but that were historically known to WebKit for encodings that ICU handles.
+ (WebCore::newTextCodecICU): Added. Used by registerCodecs.
+ (WebCore::TextCodecICU::registerCodecs): Added. Registers codecs for the same encodings
+ as above.
+ (WebCore::TextCodecICU::TextCodecICU): Much simplified since this now only handles the
+ actual ICU encoding and decoding.
+ (WebCore::TextCodecICU::~TextCodecICU): Renamed.
+ (WebCore::TextCodecICU::releaseICUConverter): Changed to be a const member function.
+ (WebCore::TextCodecICU::createICUConverter): Changed to be a const member function and
+ to check if the cached converter can be reused in a simpler way.
+ (WebCore::TextCodecICU::decode): Updated for changes to types.
+ (WebCore::TextCodecICU::encode): Updated for changes to types, and removed normalization
+ since this is now handled by the caller.
+
+ * platform/mac/StreamingTextDecoderMac.h: Renamed class to TextCodecMac. We'll rename
+ the files in a later check-in. Removed all the functions having to do with handling
+ BOM, UTF-16, and Latin-1; those are now handled elsewhere. Removed textEncodingSupported
+ because that's superseded by the registry. Added registry hook functions.
+ * platform/mac/StreamingTextDecoderMac.cpp:
+ (WebCore::TextCodecMac::registerEncodingNames): Added. Registers encodings based on
+ the charset table generated by the make-charset-table.pl perl script.
+ (WebCore::newTextCodecMac): Added. Used by registerCodecs.
+ (WebCore::TextCodecMac::registerCodecs): Added. Registers codecs for the same encodings
+ as above.
+ (WebCore::TextCodecMac::TextCodecMac): Much simplified since this now only handles the
+ actual TEC/CF encoding and decoding.
+ (WebCore::TextCodecMac::~TextCodecMac): Renamed.
+ (WebCore::TextCodecMac::releaseTECConverter): Changed to be a const member function.
+ (WebCore::TextCodecMac::createTECConverter): Changed to be a const member function.
+ (WebCore::TextCodecMac::decode): Updated for changes to types.
+ (WebCore::TextCodecMac::encode): Updated for changes to types, and removed normalization
+ since this is now handled by the caller.
+
+ * platform/mac/mac-encodings.txt: Removed most of the names in this file. This now
+ only includes encodings where we want to use Mac OS X Carbon Text Encoding Converter,
+ which is only encodings that are not supported by ICU.
+ * platform/make-charset-table.pl: Removed flags from output. We don't use them any more.
+ * platform/CharsetData.h: Changed from a platform-independent header into a
+ Macintosh-specific one. A later patch should move this and rename it. Also
+ subsumes ExtraCFEncodings.h.
+
+ * WebCore.xcodeproj/project.pbxproj: Added new files. Changed the prefix on the
+ "make character sets" rule to be kTextEncoding instead of kCFStringEncoding.
+
+ * loader/Decoder.h: Change the default encoding parameter to the constructor to be
+ a TextEncoding object. Renamed setEncodingName to setEncoding, and made it take a
+ TextEncoding for the encoding. Removed the encodingName and visuallyOrdered functions,
+ since TextEncoding supports both directly in a straightforward way. Changed both
+ decode and flush functions to return String instead of DeprecatedString. Added a
+ number of private functions to factor this class a bit more so it's easier to read.
+ Got rid of a number of redundant data members. Changed the buffer to a Vector<char>.
+ * loader/Decoder.cpp:
+ (WebCore::Decoder::determineContentType): Added. Used by constructor to determine
+ the content type based on the passed-in MIME type.
+ (WebCore::Decoder::defaultEncoding): Added. Used by constructor to determine the
+ default encoding based on the passed in default and the content type.
+ (WebCore::Decoder::Decoder): Changed to use the functions above. Also renamed
+ m_reachedBody to m_checkedForHeadCharset.
+ (WebCore::Decoder::setEncoding): Renamed and changed to take an encoding rather
+ than an encoding name.
+ (WebCore::Decoder::checkForBOM): Factored out of decode.
+ (WebCore::Decoder::checkForCSSCharset): Factored out of decode.
+ (WebCore::Decoder::checkForHeadCharset): Factored out of decode.
+ (WebCore::Decoder::detectJapaneseEncoding): Factored out of decode.
+ (WebCore::Decoder::decode): Refactored so it's no longer one huge function.
+ Changed to use the new Vector<char> and the new API for TextDecoder.
+ (WebCore::Decoder::flush): Added code to empty out the buffer. Not an issue in
+ practice since we don't re-use the decoder after flushing it.
+
+ * platform/UChar.h: Added. Has the type named WebCore::UChar that we'll be switching
+ to. We'll switch away from the ICU ::UChar type, because we don't want to be so
+ closely tied to ICU -- include this instead of <unicode/umachine.h>.
+
+ * platform/PlatformString.h:
+ * platform/String.cpp:
+ (WebCore::String::latin1): Updated for changes to TextEncoding.
+ (WebCore::String::utf8): Ditto.
+ (WebCore::String::newUninitialized): Added. Gives a way to create a String and
+ then write directly into its buffer.
+
+ * platform/StringImpl.h: Changed return value for charactersWithNullTermination to
+ be a const UChar*. While it's true that this function changes the underlying
+ StringImpl, the characters still shouldn't be modified with the returned pointer.
+ * platform/StringImpl.cpp:
+ (WebCore::StringImpl::charactersWithNullTermination): Updated for change above.
+ (WebCore::StringImpl::newUninitialized): Added. Gives a way to create a StringImpl
+ and then write directly into its buffer.
+
+ * platform/CString.h:
+ * platform/CString.cpp: (WebCore::CString::newUninitialized): Added. Gives a way
+ to create a CString and then write directly into its buffer.
+
+ * bridge/mac/WebCoreFrameBridge.h: Removed textEncoding method, and replaced
+ +[WebCoreFrameBridge stringWithData:textEncoding:] with
+ -[WebCoreFrameBridge stringWithData:] to avoid having to pass text encoding
+ IDs around.
+ * bridge/mac/WebCoreFrameBridge.mm:
+ (-[WebCoreFrameBridge setEncoding:userChosen:]): Removed now-unneeded conversion
+ to DeprecatedString.
+ (-[WebCoreFrameBridge stringByEvaluatingJavaScriptFromString:forceUserGesture:]):
+ Ditto.
+ (-[WebCoreFrameBridge aeDescByEvaluatingJavaScriptFromString:]): Ditto.
+ (-[WebCoreFrameBridge referrer]): Removed now-unneeded call to getNSString.
+ (-[WebCoreFrameBridge stringWithData:]): Added. Asks the document's decoder
+ what its encoding is, and decodes using that.
+ (+[WebCoreFrameBridge stringWithData:textEncodingName:]): Simplified so it
+ no longer involved a text encoding ID number.
+ (-[WebCoreFrameBridge smartInsertForString:replacingRange:beforeString:afterString:]):
+ Changed to use UChar instead of DeprecatedChar.
+ (-[WebCoreFrameBridge documentFragmentWithMarkupString:baseURLString:]): Removed
+ now-unneeded conversion to DeprecatedString.
+ (-[WebCoreFrameBridge documentFragmentWithText:inContext:]): Ditto.
+
+ * html/HTMLFormElement.cpp:
+ (WebCore::encodeCString): Changed parameter to CString.
+ (WebCore::HTMLFormElement::formData): Updated code for improvements to TextEncoding.
+
+ * loader/CachedCSSStyleSheet.h:
+ * loader/CachedCSSStyleSheet.cpp:
+ (WebCore::CachedCSSStyleSheet::CachedCSSStyleSheet): Fixed mistake where the
+ decoder was created without passing in the character set. Also changed from
+ DeprecatedString to String.
+ (WebCore::CachedCSSStyleSheet::setCharset): More of the same.
+
+ * bindings/js/kjs_window.h: (KJS::ScheduledAction::ScheduledAction): Changed
+ to use String instead of DeprecatedString, UChar instead of DeprecatedChar,
+ CString instead of DeprecatedCString, etc.
+ * bridge/mac/FormDataMac.mm: (WebCore::arrayFromFormData): Ditto.
+ * bridge/mac/FrameMac.h: Ditto.
+ * bridge/mac/FrameMac.mm: (WebCore::FrameMac::isCharacterSmartReplaceExempt):
+ Ditto.
+ * bridge/mac/WebCoreAXObject.mm:
+ (-[WebCoreAXObject helpText]): Ditto.
+ (-[WebCoreAXObject value]): Ditto.
+ (-[WebCoreAXObject accessibilityDescription]): Ditto.
+ (-[WebCoreAXObject doAXStringForTextMarkerRange:]): Ditto.
+ * bridge/mac/WebCoreEncodings.mm: (+[WebCoreEncodings decodeData:]): Ditto.
+ Also fixed code that does a deref without a ref to use RefPtr instead.
+ * bridge/mac/WebCoreScriptDebugger.mm:
+ (-[WebCoreScriptCallFrame evaluateWebScript:]): Ditto.
+ * bridge/mac/WebCoreSettings.mm:
+ (-[WebCoreSettings setDefaultTextEncoding:]): Ditto.
+ * css/CSSImportRule.cpp: (WebCore::CSSImportRule::insertedIntoParent): Ditto.
+ * css/cssparser.cpp: (WebCore::CSSParser::lex): Ditto.
+ * dom/Document.h:
+ * dom/Document.cpp:
+ (WebCore::Document::setCharset): Ditto.
+ (WebCore::Document::write): Ditto.
+ (WebCore::Document::determineParseMode): Ditto.
+ * dom/ProcessingInstruction.cpp:
+ (WebCore::ProcessingInstruction::checkStyleSheet): Ditto.
+ * dom/XMLTokenizer.h:
+ * dom/XMLTokenizer.cpp:
+ (WebCore::shouldAllowExternalLoad): Ditto.
+ (WebCore::createStringParser): Ditto.
+ (WebCore::XMLTokenizer::write): Ditto.
+ (WebCore::toString): Ditto.
+ (WebCore::handleElementAttributes): Ditto.
+ (WebCore::XMLTokenizer::startElementNs): Ditto.
+ (WebCore::XMLTokenizer::endElementNs): Ditto.
+ (WebCore::XMLTokenizer::characters): Ditto.
+ (WebCore::XMLTokenizer::processingInstruction): Ditto.
+ (WebCore::XMLTokenizer::cdataBlock): Ditto.
+ (WebCore::XMLTokenizer::comment): Ditto.
+ (WebCore::XMLTokenizer::internalSubset): Ditto.
+ (WebCore::getXHTMLEntity): Ditto.
+ (WebCore::externalSubsetHandler): Ditto.
+ (WebCore::XMLTokenizer::initializeParserContext): Ditto.
+ (WebCore::XMLTokenizer::notifyFinished): Ditto.
+ (WebCore::xmlDocPtrForString): Ditto.
+ (WebCore::parseXMLDocumentFragment): Ditto.
+ (WebCore::attributesStartElementNsHandler): Ditto.
+ (WebCore::parseAttributes): Ditto.
+ * html/FormDataList.h:
+ * html/FormDataList.cpp:
+ (WebCore::FormDataList::appendString): Ditto. Also changed to call the
+ encoding function by its new name and with new parameters.
+ (WebCore::FormDataList::appendFile): Ditto.
+ * html/HTMLDocument.h:
+ * html/HTMLDocument.cpp:
+ (WebCore::parseDocTypePart): Ditto.
+ (WebCore::containsString): Ditto.
+ (WebCore::parseDocTypeDeclaration): Ditto.
+ (WebCore::HTMLDocument::determineParseMode): Ditto.
+ * html/HTMLInputElement.cpp: (WebCore::HTMLInputElement::appendFormData): Ditto.
+ * html/HTMLScriptElement.cpp:
+ (WebCore::HTMLScriptElement::parseMappedAttribute): Ditto.
+ * html/HTMLTokenizer.h:
+ * html/HTMLTokenizer.cpp:
+ (WebCore::HTMLTokenizer::scriptHandler): Ditto.
+ (WebCore::HTMLTokenizer::parseTag): Ditto.
+ (WebCore::HTMLTokenizer::write): Ditto.
+ (WebCore::HTMLTokenizer::finish): Ditto.
+ (WebCore::parseHTMLDocumentFragment): Ditto.
+ * loader/Cache.h:
+ * loader/Cache.cpp:
+ (WebCore::Cache::requestStyleSheet): Ditto.
+ (WebCore::Cache::requestScript): Ditto.
+ * loader/CachedResource.h: Ditto.
+ * loader/CachedScript.h:
+ * loader/CachedScript.cpp:
+ (WebCore::CachedScript::CachedScript): Ditto.
+ (WebCore::CachedScript::ref): Ditto.
+ (WebCore::CachedScript::deref): Ditto.
+ (WebCore::CachedScript::setCharset): Ditto.
+ (WebCore::CachedScript::data): Ditto.
+ (WebCore::CachedScript::checkNotify): Ditto.
+ * loader/CachedXBLDocument.h:
+ * loader/CachedXBLDocument.cpp:
+ (WebCore::CachedXBLDocument::setCharset): Ditto.
+ * loader/CachedXSLStyleSheet.h:
+ * loader/CachedXSLStyleSheet.cpp:
+ (WebCore::CachedXSLStyleSheet::setCharset): Ditto.
+ * loader/DocLoader.cpp:
+ (WebCore::DocLoader::requestStyleSheet): Ditto.
+ (WebCore::DocLoader::requestScript): Ditto.
+ * loader/DocLoader.h: Ditto.
+ * loader/FormData.h:
+ * loader/FormData.cpp:
+ (WebCore::FormData::FormData): Ditto.
+ (WebCore::FormData::appendFile): Ditto.
+ (WebCore::FormData::flattenToString): Ditto.
+ * page/Frame.h:
+ * page/FramePrivate.h:
+ * page/Frame.cpp:
+ (WebCore::UserStyleSheetLoader::setStyleSheet): Ditto.
+ (WebCore::getString): Ditto.
+ (WebCore::Frame::replaceContentsWithScriptResult): Ditto.
+ (WebCore::Frame::executeScript): Ditto.
+ (WebCore::Frame::clear): Ditto.
+ (WebCore::Frame::write): Ditto.
+ (WebCore::Frame::endIfNotLoading): Ditto.
+ (WebCore::Frame::baseTarget): Ditto.
+ (WebCore::Frame::scheduleRedirection): Ditto.
+ (WebCore::Frame::scheduleLocationChange): Ditto.
+ (WebCore::Frame::scheduleHistoryNavigation): Ditto.
+ (WebCore::Frame::changeLocation): Ditto.
+ (WebCore::Frame::redirectionTimerFired): Ditto.
+ (WebCore::Frame::encoding): Ditto.
+ (WebCore::Frame::submitForm): Ditto.
+ (WebCore::Frame::referrer): Ditto.
+ (WebCore::Frame::isCharacterSmartReplaceExempt): Ditto.
+ (WebCore::Frame::setEncoding): Ditto.
+ * page/Settings.h: Ditto.
+ * platform/SegmentedString.h: Ditto.
+ * platform/SegmentedString.cpp: Ditto.
+ * xml/XSLStyleSheet.cpp: (WebCore::XSLStyleSheet::parseString): Ditto.
+ * xml/XSLTProcessor.cpp:
+ (WebCore::transformTextStringToXHTMLDocumentString): Ditto.
+ (WebCore::XSLTProcessor::createDocumentFromSource): Ditto.
+ * xml/xmlhttprequest.h:
+ * xml/xmlhttprequest.cpp:
+ (WebCore::XMLHttpRequest::open): Ditto.
+ (WebCore::XMLHttpRequest::send): Ditto.
+ (WebCore::XMLHttpRequest::receivedData): Ditto.
+
+ * platform/DeprecatedString.cpp:
+ (WebCore::DeprecatedString::fromUtf8): Updated for changes to TextEncoding.
+ (WebCore::DeprecatedString::utf8): Ditto.
+
+ * platform/KURL.h:
+ * platform/KURL.cpp:
+ (WebCore::KURL::KURL): Updated to overload based on presence or absence of
+ TextEncoding rather than having a default.
+ (WebCore::KURL::init): Moved body of constructor in here. Updated to use
+ the new TextEncoding interface.
+ (WebCore::KURL::decode_string): Updated to overload based on presence or
+ absence of TextEncoding rather than having a default. Updated to use
+ the new TextEncoding interface.
+ (WebCore::encodeRelativeString): Updated to use the new TextEncoding interface.
+
+ * platform/Font.cpp: (WebCore::WidthIterator::normalizeVoicingMarks): Fixed
+ code to use U_ZERO_ERROR instead of a typecast.
+
+ * bindings/js/kjs_proxy.h: Removed unneeded declaration of DeprecatedString.
+ * platform/GraphicsContext.h: Ditto.
+
+ * platform/GraphicsContext.cpp: Removed unneeded include of "DeprecatedString.h".
+ * rendering/break_lines.cpp: Ditto.
+ * xml/XMLSerializer.cpp: Ditto.
+
+ * platform/mac/FontDataMac.mm: Removed unneeded include of <unicode/unorm.h>.
+
+ * platform/CharsetNames.h: Emptied out this file. A later patch could remove it.
+ * platform/CharsetNames.cpp: Ditto.
+ * platform/mac/ExtraCFEncodings.h: Ditto.
+
2006-09-05 Nikolas Zimmermann <zimmermann@kde.org>
Reviewed by eseidel. Landed by eseidel.
<Filter
Name="DerivedSources"
>
- <File
- RelativePath="..\..\DerivedSources\WebCore\CharsetData.cpp"
- >
- </File>
<File
RelativePath="..\..\DerivedSources\WebCore\ColorData.c"
>
RelativePath="..\..\platform\AtomicStringImpl.h"
>
</File>
- <File
- RelativePath="..\..\platform\CharsetNames.cpp"
- >
- </File>
- <File
- RelativePath="..\..\platform\CharsetNames.h"
- >
- </File>
<File
RelativePath="..\..\platform\Color.cpp"
>
RelativePath="..\..\platform\TextBoundaries.h"
>
</File>
+ <File
+ RelativePath="..\..\platform\TextCodecLatin1.cpp"
+ >
+ </File>
+ <File
+ RelativePath="..\..\platform\TextCodecLatin1.h"
+ >
+ </File>
+ <File
+ RelativePath="..\..\platform\TextCodecUTF16.cpp"
+ >
+ </File>
+ <File
+ RelativePath="..\..\platform\TextCodecUTF16.h"
+ >
+ </File>
+ <File
+ RelativePath="..\..\platform\TextDecoder.cpp"
+ >
+ </File>
+ <File
+ RelativePath="..\..\platform\TextDecoder.h"
+ >
+ </File>
<File
RelativePath="..\..\platform\TextEncoding.cpp"
>
RelativePath="..\..\platform\TextEncoding.h"
>
</File>
+ <File
+ RelativePath="..\..\platform\TextEncodingRegistry.cpp"
+ >
+ </File>
+ <File
+ RelativePath="..\..\platform\TextEncodingRegistry.h"
+ >
+ </File>
<File
RelativePath="..\..\platform\TextField.h"
>
65DF323D09D1DE65000BE325 /* JSCanvasRenderingContext2D.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 65DF323709D1DE65000BE325 /* JSCanvasRenderingContext2D.cpp */; };
65DF323E09D1DE65000BE325 /* JSCanvasRenderingContext2D.h in Headers */ = {isa = PBXBuildFile; fileRef = 65DF323809D1DE65000BE325 /* JSCanvasRenderingContext2D.h */; };
65DF326109D1E199000BE325 /* UserAgentStyleSheetsData.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 656581AF09D14EE6000E61D7 /* UserAgentStyleSheetsData.cpp */; };
- 65F537D909B2B20F00F3DC4A /* CharsetNames.h in Headers */ = {isa = PBXBuildFile; fileRef = 65F537D809B2B20F00F3DC4A /* CharsetNames.h */; };
65F5386909B2BFDA00F3DC4A /* ExtraCFEncodings.h in Headers */ = {isa = PBXBuildFile; fileRef = 65F5386809B2BFDA00F3DC4A /* ExtraCFEncodings.h */; };
- 65F5386B09B2C05E00F3DC4A /* CharsetNames.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 65F5386A09B2C05E00F3DC4A /* CharsetNames.cpp */; };
65FEA86909833ADE00BED4AB /* Page.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 65FEA86809833ADE00BED4AB /* Page.cpp */; };
7E6FEED80898582300C44C3F /* WebCoreScriptDebugger.h in Headers */ = {isa = PBXBuildFile; fileRef = 7E6FEED60898582300C44C3F /* WebCoreScriptDebugger.h */; settings = {ATTRIBUTES = (Private, ); }; };
7E6FEED90898582300C44C3F /* WebCoreScriptDebugger.mm in Sources */ = {isa = PBXBuildFile; fileRef = 7E6FEED70898582300C44C3F /* WebCoreScriptDebugger.mm */; };
9326DC0B09DAD5BE00AFC847 /* CharsetData.h in Headers */ = {isa = PBXBuildFile; fileRef = 9326DC0A09DAD5BE00AFC847 /* CharsetData.h */; };
9326DC0C09DAD5D600AFC847 /* CharsetData.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 656581AC09D14EE6000E61D7 /* CharsetData.cpp */; };
9327A94209968D1A0068A546 /* HTMLOptionsCollection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9327A94109968D1A0068A546 /* HTMLOptionsCollection.cpp */; };
+ 932CA7650AAA1DF500AD1FAD /* TextDecoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 932CA7640AAA1DF500AD1FAD /* TextDecoder.cpp */; };
+ 932CA7C10AAA20C100AD1FAD /* TextCodecLatin1.h in Headers */ = {isa = PBXBuildFile; fileRef = 932CA7C00AAA20C100AD1FAD /* TextCodecLatin1.h */; };
+ 932CA7C50AAA20D200AD1FAD /* TextCodecLatin1.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 932CA7C40AAA20D200AD1FAD /* TextCodecLatin1.cpp */; };
+ 932CA83B0AAA667F00AD1FAD /* TextCodecUTF16.h in Headers */ = {isa = PBXBuildFile; fileRef = 932CA83A0AAA667F00AD1FAD /* TextCodecUTF16.h */; };
+ 932CA8480AAA66CB00AD1FAD /* TextCodecUTF16.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 932CA8470AAA66CB00AD1FAD /* TextCodecUTF16.cpp */; };
93309DD6099E64920056E581 /* AppendNodeCommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 93309D87099E64910056E581 /* AppendNodeCommand.cpp */; };
93309DD7099E64920056E581 /* AppendNodeCommand.h in Headers */ = {isa = PBXBuildFile; fileRef = 93309D88099E64910056E581 /* AppendNodeCommand.h */; };
93309DD8099E64920056E581 /* ApplyStyleCommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 93309D89099E64910056E581 /* ApplyStyleCommand.cpp */; };
93309EA2099EB78C0056E581 /* SharedTimerMac.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 93309E9F099EB78C0056E581 /* SharedTimerMac.cpp */; };
93309EA3099EB78C0056E581 /* SharedTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = 93309EA0099EB78C0056E581 /* SharedTimer.h */; };
93309EA4099EB78C0056E581 /* Timer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 93309EA1099EB78C0056E581 /* Timer.cpp */; };
+ 934706AB0AACD809002C1D43 /* TextDecoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 932CA7420AAA198E00AD1FAD /* TextDecoder.h */; };
9352071909BD3BA500F2038D /* StaticConstructors.h in Headers */ = {isa = PBXBuildFile; fileRef = 9352071709BD3BA500F2038D /* StaticConstructors.h */; };
9352071A09BD3BA500F2038D /* TextBoundaries.h in Headers */ = {isa = PBXBuildFile; fileRef = 9352071809BD3BA500F2038D /* TextBoundaries.h */; };
9352071D09BD3BBB00F2038D /* TextBoundaries.mm in Sources */ = {isa = PBXBuildFile; fileRef = 9352071B09BD3BBB00F2038D /* TextBoundaries.mm */; };
939885C408B7E3D100E707C4 /* EventNames.h in Headers */ = {isa = PBXBuildFile; fileRef = 939885C208B7E3D100E707C4 /* EventNames.h */; };
93A1EAA00A5634C9006960A0 /* ImageDocumentMac.mm in Sources */ = {isa = PBXBuildFile; fileRef = 93A1EA9F0A5634C9006960A0 /* ImageDocumentMac.mm */; };
93A1EAA80A563508006960A0 /* ImageDocumentMac.h in Headers */ = {isa = PBXBuildFile; fileRef = 93A1EAA70A563508006960A0 /* ImageDocumentMac.h */; };
+ 93AE23180AAB51E500CD8A35 /* TextEncodingRegistry.h in Headers */ = {isa = PBXBuildFile; fileRef = 93AE23170AAB51E500CD8A35 /* TextEncodingRegistry.h */; };
+ 93AE23210AAB521B00CD8A35 /* TextEncodingRegistry.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 93AE23200AAB521B00CD8A35 /* TextEncodingRegistry.cpp */; };
93B0FD8A0A759BED0080AD44 /* StringImplMac.mm in Sources */ = {isa = PBXBuildFile; fileRef = 93B0FD880A759BED0080AD44 /* StringImplMac.mm */; };
93B0FD8B0A759BED0080AD44 /* StringMac.mm in Sources */ = {isa = PBXBuildFile; fileRef = 93B0FD890A759BED0080AD44 /* StringMac.mm */; };
93B70D5D09EB0C7C009D8468 /* JSXMLHttpRequest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 93B70D4109EB0C7C009D8468 /* JSXMLHttpRequest.cpp */; };
65DF323609D1DE65000BE325 /* JSCanvasPattern.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = JSCanvasPattern.h; sourceTree = "<group>"; };
65DF323709D1DE65000BE325 /* JSCanvasRenderingContext2D.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = JSCanvasRenderingContext2D.cpp; sourceTree = "<group>"; };
65DF323809D1DE65000BE325 /* JSCanvasRenderingContext2D.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = JSCanvasRenderingContext2D.h; sourceTree = "<group>"; };
- 65F537D809B2B20F00F3DC4A /* CharsetNames.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = CharsetNames.h; sourceTree = "<group>"; };
65F5382009B2B55700F3DC4A /* character-sets.txt */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; path = "character-sets.txt"; sourceTree = "<group>"; };
65F5382109B2B55700F3DC4A /* make-charset-table.pl */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text.script.perl; path = "make-charset-table.pl"; sourceTree = "<group>"; };
65F5382409B2B57500F3DC4A /* mac-encodings.txt */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; path = "mac-encodings.txt"; sourceTree = "<group>"; };
65F5386809B2BFDA00F3DC4A /* ExtraCFEncodings.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = ExtraCFEncodings.h; sourceTree = "<group>"; };
- 65F5386A09B2C05E00F3DC4A /* CharsetNames.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = CharsetNames.cpp; sourceTree = "<group>"; };
65F80697054D9F86008BF776 /* BlockExceptions.mm */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = BlockExceptions.mm; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
65FEA86809833ADE00BED4AB /* Page.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = Page.cpp; sourceTree = "<group>"; };
7E6FEED60898582300C44C3F /* WebCoreScriptDebugger.h */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 4; lastKnownFileType = sourcecode.c.h; path = WebCoreScriptDebugger.h; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
93126F6009D7A736008D9626 /* StringHash.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = StringHash.h; sourceTree = "<group>"; };
9326DC0A09DAD5BE00AFC847 /* CharsetData.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CharsetData.h; sourceTree = "<group>"; };
9327A94109968D1A0068A546 /* HTMLOptionsCollection.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = HTMLOptionsCollection.cpp; sourceTree = "<group>"; };
+ 932CA7420AAA198E00AD1FAD /* TextDecoder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = TextDecoder.h; sourceTree = "<group>"; };
+ 932CA7640AAA1DF500AD1FAD /* TextDecoder.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = TextDecoder.cpp; sourceTree = "<group>"; };
+ 932CA7C00AAA20C100AD1FAD /* TextCodecLatin1.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = TextCodecLatin1.h; sourceTree = "<group>"; };
+ 932CA7C40AAA20D200AD1FAD /* TextCodecLatin1.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = TextCodecLatin1.cpp; sourceTree = "<group>"; };
+ 932CA83A0AAA667F00AD1FAD /* TextCodecUTF16.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = TextCodecUTF16.h; sourceTree = "<group>"; };
+ 932CA8470AAA66CB00AD1FAD /* TextCodecUTF16.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = TextCodecUTF16.cpp; sourceTree = "<group>"; };
93309D87099E64910056E581 /* AppendNodeCommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = AppendNodeCommand.cpp; sourceTree = "<group>"; };
93309D88099E64910056E581 /* AppendNodeCommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = AppendNodeCommand.h; sourceTree = "<group>"; };
93309D89099E64910056E581 /* ApplyStyleCommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ApplyStyleCommand.cpp; sourceTree = "<group>"; };
93A1EAA70A563508006960A0 /* ImageDocumentMac.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ImageDocumentMac.h; sourceTree = "<group>"; };
93ABCE5D06E1A42E0085925B /* FormData.cpp */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.cpp.cpp; path = FormData.cpp; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
93ABCE5E06E1A42E0085925B /* FormData.h */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.c.h; path = FormData.h; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
+ 93AE23170AAB51E500CD8A35 /* TextEncodingRegistry.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = TextEncodingRegistry.h; sourceTree = "<group>"; };
+ 93AE23200AAB521B00CD8A35 /* TextEncodingRegistry.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = TextEncodingRegistry.cpp; sourceTree = "<group>"; };
93B0FD880A759BED0080AD44 /* StringImplMac.mm */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.objcpp; path = StringImplMac.mm; sourceTree = "<group>"; };
93B0FD890A759BED0080AD44 /* StringMac.mm */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.objcpp; path = StringMac.mm; sourceTree = "<group>"; };
93B641F106E28C5C0055F610 /* FormDataMac.mm */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = FormDataMac.mm; sourceTree = "<group>"; tabWidth = 8; usesTabs = 0; };
F5C2869302846DCD018635CA /* ApplicationServices.framework */,
F5C2869402846DCD018635CA /* Carbon.framework */,
F5C2869502846DCD018635CA /* Cocoa.framework */,
- A85D7A2F0879EBA9006A9172 /* QuartzCore.framework */,
F8216299029F4FB501000131 /* JavaScriptCore.framework */,
93F1D31A0558CC5C00821BC0 /* libicucore.dylib */,
+ 1CFAE3220A6D6A3F0032593D /* libobjc.dylib */,
9372560F0A61564200B053B9 /* libsqlite3.dylib */,
DD763BB10992C2C900740B8E /* libxml2.dylib */,
840633070731A77200DB1FD1 /* libxslt.dylib */,
- 1CFAE3220A6D6A3F0032593D /* libobjc.dylib */,
+ A85D7A2F0879EBA9006A9172 /* QuartzCore.framework */,
);
name = Frameworks;
sourceTree = "<group>";
BCF1A5BA097832090061A123 /* platform */ = {
isa = PBXGroup;
children = (
- AA98B87C0AAA02F8001A44C2 /* Path.cpp */,
1AE42F670AA4B8CB00C8612D /* cf */,
93032CCC09AEC36200F82A18 /* cg */,
6582A14809999D6C00BEEB6D /* mac */,
93CD4FD90995F9EA007ECC97 /* AtomicStringImpl.h */,
65F5382009B2B55700F3DC4A /* character-sets.txt */,
9326DC0A09DAD5BE00AFC847 /* CharsetData.h */,
- 65F5386A09B2C05E00F3DC4A /* CharsetNames.cpp */,
- 65F537D809B2B20F00F3DC4A /* CharsetNames.h */,
BCC8CFC80986CD2400140BF2 /* Color.cpp */,
BCC8CFC90986CD2400140BF2 /* Color.h */,
BCC8CFCA0986CD2400140BF2 /* ColorData.gperf */,
A8239DFE09B3CF8A00B60641 /* Logging.cpp */,
A8239DFF09B3CF8A00B60641 /* Logging.h */,
65F5382109B2B55700F3DC4A /* make-charset-table.pl */,
+ AA98B87C0AAA02F8001A44C2 /* Path.cpp */,
93032CC909AEC34B00F82A18 /* Path.h */,
BCC8D1710988301200140BF2 /* Pen.cpp */,
BCC8D1720988301200140BF2 /* Pen.h */,
93CD4FDD0995F9EA007ECC97 /* StringImpl.h */,
93E62D990985F41600E1B5E3 /* SystemTime.h */,
9352071809BD3BA500F2038D /* TextBoundaries.h */,
+ 932CA7C40AAA20D200AD1FAD /* TextCodecLatin1.cpp */,
+ 932CA7C00AAA20C100AD1FAD /* TextCodecLatin1.h */,
+ 932CA8470AAA66CB00AD1FAD /* TextCodecUTF16.cpp */,
+ 932CA83A0AAA667F00AD1FAD /* TextCodecUTF16.h */,
+ 932CA7640AAA1DF500AD1FAD /* TextDecoder.cpp */,
+ 932CA7420AAA198E00AD1FAD /* TextDecoder.h */,
9353686A09AF78F600D35CD6 /* TextDirection.h */,
657BD74B09AFDC54005A2056 /* TextEncoding.cpp */,
657BD74C09AFDC54005A2056 /* TextEncoding.h */,
+ 93AE23200AAB521B00CD8A35 /* TextEncodingRegistry.cpp */,
+ 93AE23170AAB51E500CD8A35 /* TextEncodingRegistry.h */,
F587869802DE3B8601EA4122 /* TextField.h */,
F587853902DE375901EA4122 /* TextStream.cpp */,
F58786BF02DE3B8601EA4122 /* TextStream.h */,
657BD74E09AFDC54005A2056 /* StreamingTextDecoder.h in Headers */,
657BD75009AFDC54005A2056 /* TextEncoding.h in Headers */,
BC6D6E2609AF943500F59759 /* ScrollView.h in Headers */,
- 65F537D909B2B20F00F3DC4A /* CharsetNames.h in Headers */,
65F5386909B2BFDA00F3DC4A /* ExtraCFEncodings.h in Headers */,
93B780CA09B3B7FE00690162 /* WidgetClient.h in Headers */,
A82398A609B3ACDB00B60641 /* PlugInInfoStore.h in Headers */,
852B9E870AA79C47002ADA6E /* DOMHTMLAppletElement.h in Headers */,
852B9E890AA79C47002ADA6E /* DOMHTMLOptionElement.h in Headers */,
1A98955D0AA78149005EF5EF /* CString.h in Headers */,
+ 932CA7C10AAA20C100AD1FAD /* TextCodecLatin1.h in Headers */,
+ 932CA83B0AAA667F00AD1FAD /* TextCodecUTF16.h in Headers */,
85032DD70AA8C9BE007D3B7D /* DOMCSSCharsetRule.h in Headers */,
85032DD90AA8C9BE007D3B7D /* DOMCSSFontFaceRule.h in Headers */,
85032DDB0AA8C9BE007D3B7D /* DOMCSSImportRule.h in Headers */,
858C38A70AA8F20400B187A4 /* DOMRect.h in Headers */,
858C39280AA8FF9D00B187A4 /* DOMCSSStyleSheet.h in Headers */,
858C395F0AA902A900B187A4 /* DOMRGBColor.h in Headers */,
+ 93AE23180AAB51E500CD8A35 /* TextEncodingRegistry.h in Headers */,
850656FE0AAB4763002D15C0 /* DOMMediaList.h in Headers */,
850657000AAB4763002D15C0 /* DOMStyleSheet.h in Headers */,
850657020AAB4763002D15C0 /* DOMStyleSheetList.h in Headers */,
+ 934706AB0AACD809002C1D43 /* TextDecoder.h in Headers */,
);
runOnlyForDeploymentPostprocessing = 0;
};
);
runOnlyForDeploymentPostprocessing = 0;
shellPath = /bin/sh;
- shellScript = "mkdir -p \"${BUILT_PRODUCTS_DIR}/DerivedSources/WebCore\"\ncd \"${BUILT_PRODUCTS_DIR}/DerivedSources/WebCore\"\n\nif [ \"${BUILD_STYLE}\" = \"Release\" -o \"${BUILD_STYLE}\" = \"Debug\" ] ; then\n export CREATE_HASH_TABLE=\"${BUILT_PRODUCTS_DIR}/JavaScriptCore.framework/PrivateHeaders/create_hash_table\"\nelse\n export CREATE_HASH_TABLE=\"${NEXT_ROOT}${SYSTEM_LIBRARY_DIR}/Frameworks/JavaScriptCore.framework/PrivateHeaders/create_hash_table\"\nfi\n\nln -sfh \"${SRCROOT}\" WebCore\nexport WebCore=\"WebCore\"\n\nexport ENCODINGS_FILE=\"WebCore/platform/mac/mac-encodings.txt\"\nexport ENCODINGS_PREFIX=\"kCFStringEncoding\"\n\nif [ \"${ACTION}\" = \"build\" -o \"${ACTION}\" = \"install\" ]; then\n make -f \"WebCore/DerivedSources.make\"\nfi\n";
+ shellScript = "mkdir -p \"${BUILT_PRODUCTS_DIR}/DerivedSources/WebCore\"\ncd \"${BUILT_PRODUCTS_DIR}/DerivedSources/WebCore\"\n\nif [ \"${BUILD_STYLE}\" = \"Release\" -o \"${BUILD_STYLE}\" = \"Debug\" ] ; then\n export CREATE_HASH_TABLE=\"${BUILT_PRODUCTS_DIR}/JavaScriptCore.framework/PrivateHeaders/create_hash_table\"\nelse\n export CREATE_HASH_TABLE=\"${NEXT_ROOT}${SYSTEM_LIBRARY_DIR}/Frameworks/JavaScriptCore.framework/PrivateHeaders/create_hash_table\"\nfi\n\nln -sfh \"${SRCROOT}\" WebCore\nexport WebCore=\"WebCore\"\n\nexport ENCODINGS_FILE=\"WebCore/platform/mac/mac-encodings.txt\"\nexport ENCODINGS_PREFIX=\"kTextEncoding\"\n\nif [ \"${ACTION}\" = \"build\" -o \"${ACTION}\" = \"install\" ]; then\n make -f \"WebCore/DerivedSources.make\"\nfi\n";
};
/* End PBXShellScriptBuildPhase section */
935367E909AF77EF00D35CD6 /* GraphicsContextMac.mm in Sources */,
657BD74D09AFDC54005A2056 /* StreamingTextDecoder.cpp in Sources */,
657BD74F09AFDC54005A2056 /* TextEncoding.cpp in Sources */,
- 65F5386B09B2C05E00F3DC4A /* CharsetNames.cpp in Sources */,
A82398A809B3ACF500B60641 /* PlugInInfoStoreMac.mm in Sources */,
A8239E0009B3CF8A00B60641 /* Logging.cpp in Sources */,
BCEB377309B7BA3900CB38B1 /* FontMac.mm in Sources */,
852B9E8A0AA79C47002ADA6E /* DOMHTMLOptionElement.mm in Sources */,
1A98955C0AA78149005EF5EF /* CString.cpp in Sources */,
1A98956B0AA78F80005EF5EF /* KURLCFNet.cpp in Sources */,
+ 932CA7650AAA1DF500AD1FAD /* TextDecoder.cpp in Sources */,
+ 932CA7C50AAA20D200AD1FAD /* TextCodecLatin1.cpp in Sources */,
+ 932CA8480AAA66CB00AD1FAD /* TextCodecUTF16.cpp in Sources */,
AA98B87D0AAA02F8001A44C2 /* Path.cpp in Sources */,
85032DD80AA8C9BE007D3B7D /* DOMCSSCharsetRule.mm in Sources */,
85032DDA0AA8C9BE007D3B7D /* DOMCSSFontFaceRule.mm in Sources */,
858C38A80AA8F20400B187A4 /* DOMRect.mm in Sources */,
858C39290AA8FF9D00B187A4 /* DOMCSSStyleSheet.mm in Sources */,
858C39600AA902A900B187A4 /* DOMRGBColor.mm in Sources */,
+ 93AE23210AAB521B00CD8A35 /* TextEncodingRegistry.cpp in Sources */,
850656FF0AAB4763002D15C0 /* DOMMediaList.mm in Sources */,
850657010AAB4763002D15C0 /* DOMStyleSheet.mm in Sources */,
850657030AAB4763002D15C0 /* DOMStyleSheetList.mm in Sources */,
</set>
<set append="1" var="WEBCORE_SOURCES_DERIVEDSOURCES">
- DerivedSources/WebCore/CharsetData.cpp
DerivedSources/WebCore/ColorData.c
DerivedSources/WebCore/CSSGrammar.cpp
DerivedSources/WebCore/HTMLEntityNames.c
platform/Arena.cpp
platform/ArrayImpl.cpp
platform/AtomicString.cpp
- platform/CharsetNames.cpp
platform/Color.cpp
platform/DeprecatedCString.cpp
platform/DeprecatedPtrListImpl.cpp
platform/StreamingTextDecoderICU.cpp
platform/String.cpp
platform/StringImpl.cpp
+ platform/TextCodecLatin1.cpp
+ platform/TextCodecUTF16.cpp
+ platform/TextDecoder.cpp
platform/TextEncoding.cpp
+ platform/TextEncodingRegistry.cpp
platform/TextStream.cpp
platform/Timer.cpp
platform/ResourceLoader.cpp
namespace WebCore {
-class DeprecatedString;
class Event;
class EventListener;
class Frame;
private:
RefPtr<KJS::ScriptInterpreter> m_script;
- Frame *m_frame;
+ Frame* m_frame;
int m_handlerLineno;
};
#ifndef KJS_WINDOW_H_
#define KJS_WINDOW_H_
-#include "DeprecatedString.h"
+#include "PlatformString.h"
#include "kjs_binding.h"
#include <wtf/HashMap.h>
public:
ScheduledAction(JSValue *func, const List& args)
: m_func(func), m_args(args) { }
- ScheduledAction(const WebCore::DeprecatedString& code)
+ ScheduledAction(const WebCore::String& code)
: m_code(code) { }
void execute(Window *);
private:
ProtectedPtr<JSValue> m_func;
List m_args;
- WebCore::DeprecatedString m_code;
+ WebCore::String m_code;
};
class Location : public DOMObject {
[a addObject:[NSData dataWithBytes:e.m_data.data() length:e.m_data.size()]];
} else {
ASSERT(e.m_type == FormDataElement::encodedFile);
- [a addObject:e.m_filename.getNSString()];
+ [a addObject:e.m_filename];
}
}
return a;
NSMutableDictionary* dashboardRegionsDictionary();
void dashboardRegionsChanged();
- virtual bool isCharacterSmartReplaceExempt(const DeprecatedChar &, bool);
+ virtual bool isCharacterSmartReplaceExempt(UChar, bool);
virtual bool mouseDownMayStartSelect() const { return _mouseDownMayStartSelect; }
[_bridge dashboardRegionsChanged:webRegions];
}
-bool FrameMac::isCharacterSmartReplaceExempt(const DeprecatedChar &c, bool isPreviousChar)
+bool FrameMac::isCharacterSmartReplaceExempt(UChar c, bool isPreviousChar)
{
- return [_bridge isCharacterSmartReplaceExempt:c.unicode() isPreviousCharacter:isPreviousChar];
+ return [_bridge isCharacterSmartReplaceExempt:c isPreviousCharacter:isPreviousChar];
}
void FrameMac::handledOnloadEvents()
using namespace EventNames;
using namespace HTMLNames;
+using WebCore::UChar;
+
// FIXME: This will eventually need to really localize.
#define UI_STRING(string, comment) ((NSString*)[NSString stringWithUTF8String:(string)])
return nil;
if (m_areaElement) {
- DeprecatedString summary = static_cast<Element*>(m_areaElement)->getAttribute(summaryAttr).deprecatedString();
+ const AtomicString& summary = static_cast<Element*>(m_areaElement)->getAttribute(summaryAttr);
if (!summary.isEmpty())
- return summary.getNSString();
- DeprecatedString title = static_cast<Element*>(m_areaElement)->getAttribute(titleAttr).deprecatedString();
+ return summary;
+ const AtomicString& title = static_cast<Element*>(m_areaElement)->getAttribute(titleAttr);
if (!title.isEmpty())
- return title.getNSString();
+ return title;
}
for (RenderObject* curr = m_renderer; curr; curr = curr->parent()) {
if (curr->element() && curr->element()->isHTMLElement()) {
- DeprecatedString summary = static_cast<Element*>(curr->element())->getAttribute(summaryAttr).deprecatedString();
+ const AtomicString& summary = static_cast<Element*>(curr->element())->getAttribute(summaryAttr);
if (!summary.isEmpty())
- return summary.getNSString();
- DeprecatedString title = static_cast<Element*>(curr->element())->getAttribute(titleAttr).deprecatedString();
+ return summary;
+ const AtomicString& title = static_cast<Element*>(curr->element())->getAttribute(titleAttr);
if (!title.isEmpty())
- return title.getNSString();
+ return title;
}
}
if (startVisiblePosition.isNull() || endVisiblePosition.isNull())
return nil;
- DeprecatedString qString = plainText(makeRange(startVisiblePosition, endVisiblePosition).get());
-
- // transform it to a CFString and return that
- return (id)qString.getCFString();
+ return plainText(makeRange(startVisiblePosition, endVisiblePosition).get()).getNSString();
}
if ([self isAttachment])
if (m_renderer->isImage()) {
if (m_renderer->element() && m_renderer->element()->isHTMLElement()) {
- DeprecatedString alt = static_cast<Element*>(m_renderer->element())->getAttribute(altAttr).deprecatedString();
- return !alt.isEmpty() ? alt.getNSString() : nil;
+ const AtomicString& alt = static_cast<Element*>(m_renderer->element())->getAttribute(altAttr);
+ if (alt.isEmpty())
+ return nil;
+ return alt;
}
} else if ([self isAttachment])
return [[self attachmentView] accessibilityAttributeValue:NSAccessibilityTitleAttribute];
return nil;
// get the visible text in the range
- DeprecatedString qString = plainText(makeRange(startVisiblePosition, endVisiblePosition).get());
-
- // transform it to a CFString and return that
- return (id)qString.getCFString();
+ return plainText(makeRange(startVisiblePosition, endVisiblePosition).get()).getNSString();
}
- (id)doAXTextMarkerForPosition: (NSPoint) point
+ (NSString *)decodeData:(NSData *)data
{
HTMLNames::init(); // this method is used for importing bookmarks at startup, so HTMLNames are likely to be uninitialized yet
- Decoder* decoder = new Decoder("text/html"); // bookmark files are HTML
- DeprecatedString result = decoder->decode(static_cast<const char *>([data bytes]), [data length]);
+ RefPtr<Decoder> decoder = new Decoder("text/html"); // bookmark files are HTML
+ String result = decoder->decode(static_cast<const char*>([data bytes]), [data length]);
result += decoder->flush();
- decoder->deref();
- return result.getNSString();
+ return result;
}
@end
- (void)setTextSizeMultiplier:(float)multiplier;
-- (CFStringEncoding)textEncoding;
-
- (NSString *)stringByEvaluatingJavaScriptFromString:(NSString *)string;
- (NSString *)stringByEvaluatingJavaScriptFromString:(NSString *)string forceUserGesture:(BOOL)forceUserGesture;
- (NSAppleEventDescriptor *)aeDescByEvaluatingJavaScriptFromString:(NSString *)string;
- (NSDictionary *)fontAttributesForSelectionStart;
- (NSWritingDirection)baseWritingDirectionForSelectionStart;
-+ (NSString *)stringWithData:(NSData *)data textEncoding:(CFStringEncoding)textEncoding;
-+ (NSString *)stringWithData:(NSData *)data textEncodingName:(NSString *)textEncodingName;
+- (NSString *)stringWithData:(NSData *)data; // using the encoding of the frame's main resource
++ (NSString *)stringWithData:(NSData *)data textEncodingName:(NSString *)textEncodingName; // nil for textEncodingName means Latin-1
- (BOOL)interceptKeyEvent:(NSEvent *)event toView:(NSView *)view;
#import "CharsetNames.h"
#import "DOMImplementation.h"
#import "DOMInternal.h"
+#import "Decoder.h"
#import "DeleteSelectionCommand.h"
#import "DocLoader.h"
#import "DocumentFragment.h"
#import "HTMLInputElement.h"
#import "HTMLNames.h"
#import "Image.h"
-#import "WebCoreEditCommand.h"
#import "LoaderFunctions.h"
-#import "WebCorePageState.h"
#import "ModifySelectionListLevel.h"
#import "MoveSelectionCommand.h"
#import "Page.h"
#import "PlugInInfoStore.h"
-#import "RenderView.h"
#import "RenderImage.h"
#import "RenderPart.h"
#import "RenderTreeAsText.h"
+#import "RenderView.h"
#import "RenderWidget.h"
#import "ReplaceSelectionCommand.h"
#import "Screen.h"
#import "SelectionController.h"
+#import "TextEncoding.h"
#import "TextIterator.h"
#import "TypingCommand.h"
+#import "WebCoreEditCommand.h"
#import "WebCorePageBridge.h"
+#import "WebCorePageState.h"
#import "WebCoreSettings.h"
#import "WebCoreSystemInterface.h"
#import "WebCoreViewFactory.h"
#import "WebCoreWidgetHolder.h"
+#import "XMLTokenizer.h"
#import "csshelper.h"
#import "htmlediting.h"
#import "kjs_proxy.h"
#import "kjs_window.h"
#import "markup.h"
#import "visible_units.h"
-#import "XMLTokenizer.h"
#import <JavaScriptCore/array_instance.h>
#import <JavaScriptCore/date_object.h>
#import <JavaScriptCore/runtime_root.h>
using KJS::Bindings::RootObject;
+using WebCore::UChar;
+
NSString *WebCorePageCacheStateKey = @"WebCorePageCacheState";
@interface WebCoreFrameBridge (WebCoreBridgeInternal)
- (void)setEncoding:(NSString *)encoding userChosen:(BOOL)userChosen
{
- m_frame->setEncoding(DeprecatedString::fromNSString(encoding), userChosen);
+ m_frame->setEncoding(encoding, userChosen);
}
- (void)addData:(NSData *)data
m_frame->setZoomFactor(newZoomFactor);
}
-- (CFStringEncoding)textEncoding
-{
- return WebCore::TextEncoding(m_frame->encoding().latin1()).encodingID();
-}
-
- (NSView *)nextKeyView
{
Document *doc = m_frame->document();
- (NSString *)stringByEvaluatingJavaScriptFromString:(NSString *)string forceUserGesture:(BOOL)forceUserGesture
{
m_frame->createEmptyDocument();
- JSValue* result = m_frame->executeScript(0, DeprecatedString::fromNSString(string), forceUserGesture);
+ JSValue* result = m_frame->executeScript(0, string, forceUserGesture);
if (!result || !result->isString())
return 0;
JSLock lock;
- (NSAppleEventDescriptor *)aeDescByEvaluatingJavaScriptFromString:(NSString *)string
{
m_frame->createEmptyDocument();
- JSValue* result = m_frame->executeScript(0, DeprecatedString::fromNSString(string), true);
+ JSValue* result = m_frame->executeScript(0, string, true);
if (!result) // FIXME: pass errors
return 0;
JSLock lock;
- (NSString *)referrer
{
- return m_frame->referrer().getNSString();
+ return m_frame->referrer();
}
- (WebCoreFrameBridge *)opener
- (void)setOpener:(WebCoreFrameBridge *)bridge;
{
- Frame *p = [bridge impl];
-
- if (p)
- p->setOpener(m_frame);
+ if (Frame* f = [bridge impl])
+ f->setOpener(m_frame);
}
-+ (NSString *)stringWithData:(NSData *)data textEncoding:(CFStringEncoding)textEncoding
+- (NSString *)stringWithData:(NSData *)data
{
- if (textEncoding == kCFStringEncodingInvalidId)
- textEncoding = kCFStringEncodingWindowsLatin1;
-
- return WebCore::TextEncoding(textEncoding).toUnicode((const char*)[data bytes], [data length]).getNSString();
+ Document* doc = m_frame->document();
+ if (!doc)
+ return nil;
+ Decoder* decoder = doc->decoder();
+ if (!decoder)
+ return nil;
+ return decoder->encoding().decode(reinterpret_cast<const char*>([data bytes]), [data length]);
}
+ (NSString *)stringWithData:(NSData *)data textEncodingName:(NSString *)textEncodingName
{
- CFStringEncoding textEncoding = WebCore::TextEncoding([textEncodingName lossyCString]).encodingID();
- return [WebCoreFrameBridge stringWithData:data textEncoding:textEncoding];
+ return WebCore::TextEncoding(textEncodingName).decode(reinterpret_cast<const char*>([data bytes]), [data length]);
}
- (BOOL)needsLayout
return;
bool addLeadingSpace = startPos.leadingWhitespacePosition(VP_DEFAULT_AFFINITY, true).isNull() && !isStartOfParagraph(startVisiblePos);
- if (addLeadingSpace) {
- DeprecatedChar previousChar = startVisiblePos.previous().characterAfter();
- if (previousChar.unicode())
+ if (addLeadingSpace)
+ if (UChar previousChar = startVisiblePos.previous().characterAfter())
addLeadingSpace = !m_frame->isCharacterSmartReplaceExempt(previousChar, true);
- }
bool addTrailingSpace = endPos.trailingWhitespacePosition(VP_DEFAULT_AFFINITY, true).isNull() && !isEndOfParagraph(endVisiblePos);
- if (addTrailingSpace) {
- DeprecatedChar thisChar = endVisiblePos.characterAfter();
- if (thisChar.unicode())
+ if (addTrailingSpace)
+ if (UChar thisChar = endVisiblePos.characterAfter())
addTrailingSpace = !m_frame->isCharacterSmartReplaceExempt(thisChar, false);
- }
// inspect source
bool hasWhitespaceAtStart = false;
if (!m_frame || !m_frame->document())
return 0;
- return [DOMDocumentFragment _documentFragmentWith:createFragmentFromMarkup(m_frame->document(),
- DeprecatedString::fromNSString(markupString), DeprecatedString::fromNSString(baseURLString)).get()];
+ return [DOMDocumentFragment _documentFragmentWith:createFragmentFromMarkup(m_frame->document(), markupString, baseURLString).get()];
}
- (DOMDocumentFragment *)documentFragmentWithText:(NSString *)text inContext:(DOMRange *)context
{
- return [DOMDocumentFragment _documentFragmentWith:createFragmentFromText([context _range], DeprecatedString::fromNSString(text)).get()];
+ return [DOMDocumentFragment _documentFragmentWith:createFragmentFromText([context _range], text).get()];
}
- (DOMDocumentFragment *)documentFragmentWithNodesAsParagraphs:(NSArray *)nodes
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#include "config.h"
+#import "config.h"
#import "WebCoreScriptDebugger.h"
-#import "WebScriptObjectPrivate.h"
-
-#import <JavaScriptCore/debugger.h>
-#import <JavaScriptCore/context.h>
#import "DeprecatedString.h"
#import "KURL.h"
+#import "PlatformString.h"
+#import "WebScriptObjectPrivate.h"
+#import <JavaScriptCore/context.h>
+#import <JavaScriptCore/debugger.h>
using namespace KJS;
using namespace WebCore;
- (id)evaluateWebScript:(NSString *)script
{
- UString code(DeprecatedString::fromNSString(script));
+ UString code = String(script);
ExecState *state = _state;
Interpreter *interp = state->dynamicInterpreter();
return;
[defaultTextEncoding release];
defaultTextEncoding = [s copy];
- settings->setEncoding(DeprecatedString::fromNSString(s));
+ settings->setEncoding(s);
}
- (NSString *)defaultTextEncoding
#include "Document.h"
#include "HTMLNames.h"
#include "MediaList.h"
+#include "MediaQuery.h"
+#include "MediaQueryExp.h"
#include "PlatformString.h"
#include "cssparser.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
-#include "MediaQuery.h"
-#include "MediaQueryExp.h"
#ifdef SVG_SUPPORT
#include "ksvgcssproperties.h"
using namespace WebCore;
using namespace HTMLNames;
-//
+using WebCore::UChar;
+
// The following file defines the function
// const struct props *findProp(const char *word, int len)
//
return;
// ### pass correct charset here!!
- m_cachedSheet = docLoader->requestStyleSheet(absHref, DeprecatedString::null);
+ m_cachedSheet = docLoader->requestStyleSheet(absHref, String());
if (m_cachedSheet) {
m_loading = true;
m_cachedSheet->ref(this);
int length;
UChar* t = text(&length);
-#ifdef TOKEN_DEBUG
- qDebug("CSSTokenizer: got token %d: '%s'", token, token == END_TOKEN ? "" : DeprecatedString((DeprecatedChar *)t, length).latin1());
-#endif
switch(token) {
case WHITESPACE:
case SGML_CD:
String Document::inputEncoding() const
{
if (Decoder* d = decoder())
- return d->encodingName();
+ return d->encoding().name();
return String();
}
{
if (!decoder())
return;
- decoder()->setEncodingName(charset.deprecatedString().ascii(), Decoder::UserChosenEncoding);
+ decoder()->setEncoding(charset, Decoder::UserChosenEncoding);
}
Element* Document::elementFromPoint(int x, int y) const
return static_cast<int>((currentTime() - m_startTime) * 1000);
}
-void Document::write(const String &text)
+void Document::write(const DeprecatedString& text)
{
- write(text.deprecatedString());
+ write(String(text));
}
-void Document::write(const DeprecatedString &text)
+void Document::write(const String& text)
{
#ifdef INSTRUMENT_LAYOUT_SCHEDULING
if (!ownerElement())
return m_elemSheet.get();
}
-void Document::determineParseMode(const DeprecatedString &/*str*/)
+void Document::determineParseMode(const String&)
{
- // For XML documents use strict parse mode. HTML docs will override this method to
- // determine their parse mode.
+ // For XML documents use strict parse mode.
+ // HTML overrides this method to determine the parse mode.
pMode = Strict;
hMode = XHtml;
}
enum ParseMode { Compat, AlmostStrict, Strict };
- virtual void determineParseMode( const DeprecatedString &str );
+ virtual void determineParseMode(const String&);
void setParseMode(ParseMode m) { pMode = m; }
ParseMode parseMode() const { return pMode; }
m_cachedSheet = document()->docLoader()->requestXSLStyleSheet(document()->completeURL(href));
else
#endif
- m_cachedSheet = document()->docLoader()->requestStyleSheet(document()->completeURL(href), DeprecatedString::null);
+ m_cachedSheet = document()->docLoader()->requestStyleSheet(document()->completeURL(href), String());
if (m_cachedSheet)
m_cachedSheet->ref( this );
#if KHTML_XSLT
#include "XMLTokenizer.h"
#include "CDATASection.h"
+#include "CString.h"
#include "Cache.h"
#include "CachedScript.h"
#include "Comment.h"
Document *m_doc;
FrameView *m_view;
- DeprecatedString m_originalSourceForTransform;
+ String m_originalSourceForTransform;
xmlParserCtxtPtr m_context;
Node *m_currentNode;
static bool shouldAllowExternalLoad(const char* inURI)
{
- DeprecatedString url(inURI);
-
- if (url.contains("/etc/xml/catalog")
- || url.startsWith("http://www.w3.org/Graphics/SVG")
- || url.startsWith("http://www.w3.org/TR/xhtml"))
+ if (strstr(inURI, "/etc/xml/catalog")
+ || strstr(inURI, "http://www.w3.org/Graphics/SVG") == inURI
+ || strstr(inURI, "http://www.w3.org/TR/xhtml") == inURI)
return false;
return true;
}
globalDocLoader = docLoader;
}
-static xmlParserCtxtPtr createQStringParser(xmlSAXHandlerPtr handlers, void *userData)
+static xmlParserCtxtPtr createStringParser(xmlSAXHandlerPtr handlers, void* userData)
{
static bool didInit = false;
if (!didInit) {
xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(handlers, 0, 0, 0, 0);
parser->_private = userData;
parser->replaceEntities = true;
- const DeprecatedChar BOM(0xFEFF);
- const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char *>(&BOM);
+ const UChar BOM = 0xFEFF;
+ const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
xmlSwitchEncoding(parser, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
return parser;
}
m_currentNodeIsReferenced = nodeNeedsReference;
}
-bool XMLTokenizer::write(const SegmentedString &s, bool /*appendData*/ )
+bool XMLTokenizer::write(const SegmentedString& s, bool /*appendData*/)
{
- DeprecatedString parseString = s.toString();
+ String parseString = s.toString();
if (m_sawXSLTransform || !m_sawFirstElement)
m_originalSourceForTransform += parseString;
// resetting the encoding to UTF-16 before every chunk. Otherwise libxml
// will detect <?xml version="1.0" encoding="<encoding name>"?> blocks
// and switch encodings, causing the parse to fail.
- const DeprecatedChar BOM(0xFEFF);
- const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char *>(&BOM);
+ const UChar BOM = 0xFEFF;
+ const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
xmlSwitchEncoding(m_context, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
-
- xmlParseChunk(m_context, reinterpret_cast<const char *>(parseString.unicode()), sizeof(DeprecatedChar) * parseString.length(), 0);
+
+ xmlParseChunk(m_context, reinterpret_cast<const char*>(parseString.characters()), sizeof(UChar) * parseString.length(), 0);
}
return false;
}
-inline DeprecatedString toQString(const xmlChar *str, unsigned int len)
-{
- return DeprecatedString::fromUtf8(reinterpret_cast<const char *>(str), len);
-}
-
-inline DeprecatedString toQString(const xmlChar *str)
-{
- return DeprecatedString::fromUtf8(str ? reinterpret_cast<const char *>(str) : "");
-}
-
-inline String toString(const xmlChar* str, unsigned int len)
+inline String toString(const xmlChar* str, unsigned len)
{
- return DeprecatedString::fromUtf8(reinterpret_cast<const char *>(str), len);
+ return UTF8Encoding().decode(reinterpret_cast<const char*>(str), len);
}
inline String toString(const xmlChar* str)
{
- return DeprecatedString::fromUtf8(str ? reinterpret_cast<const char *>(str) : "");
+ const char* cstr = str ? reinterpret_cast<const char*>(str) : "";
+ return UTF8Encoding().decode(cstr, strlen(cstr));
}
struct _xmlSAX2Namespace {
{
xmlSAX2Attributes *attributes = reinterpret_cast<xmlSAX2Attributes *>(libxmlAttributes);
for(int i = 0; i < nb_attributes; i++) {
- String attrLocalName = toQString(attributes[i].localname);
+ String attrLocalName = toString(attributes[i].localname);
int valueLength = (int) (attributes[i].end - attributes[i].value);
- String attrValue = toQString(attributes[i].value, valueLength);
- String attrPrefix = toQString(attributes[i].prefix);
- String attrURI = attrPrefix.isEmpty() ? String() : toQString(attributes[i].uri);
+ String attrValue = toString(attributes[i].value, valueLength);
+ String attrPrefix = toString(attributes[i].prefix);
+ String attrURI = attrPrefix.isEmpty() ? String() : toString(attributes[i].uri);
String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + ":" + attrLocalName;
newElement->setAttributeNS(attrURI, attrQName, attrValue, ec);
exitText();
- String localName = toQString(xmlLocalName);
- String uri = toQString(xmlURI);
- String prefix = toQString(xmlPrefix);
+ String localName = toString(xmlLocalName);
+ String uri = toString(xmlURI);
+ String prefix = toString(xmlPrefix);
String qName = prefix.isEmpty() ? localName : prefix + ":" + localName;
if (m_parsingFragment && uri.isEmpty()) {
if (!scriptHref.isEmpty()) {
// we have a src attribute
- DeprecatedString charset = scriptElement->getAttribute(charsetAttr).deprecatedString();
-
+ const AtomicString& charset = scriptElement->getAttribute(charsetAttr);
if ((m_pendingScript = m_doc->docLoader()->requestScript(scriptHref, charset))) {
m_scriptElement = scriptElement;
m_pendingScript->ref(this);
m_scriptElement = 0;
} else {
- DeprecatedString scriptCode = "";
+ String scriptCode = "";
for (Node *child = scriptElement->firstChild(); child; child = child->nextSibling()) {
if (child->isTextNode() || child->nodeType() == Node::CDATA_SECTION_NODE)
- scriptCode += static_cast<CharacterData*>(child)->data().deprecatedString();
+ scriptCode += static_cast<CharacterData*>(child)->data();
}
-
m_view->frame()->executeScript(0, scriptCode);
}
if (m_currentNode->isTextNode() || enterText()) {
ExceptionCode ec = 0;
- static_cast<Text*>(m_currentNode)->appendData(toQString(s, len), ec);
+ static_cast<Text*>(m_currentNode)->appendData(toString(s, len), ec);
}
}
// ### handle exceptions
int exception = 0;
RefPtr<ProcessingInstruction> pi = m_doc->createProcessingInstruction(
- toQString(target), toQString(data), exception);
+ toString(target), toString(data), exception);
if (exception)
return;
exitText();
- RefPtr<Node> newNode = new CDATASection(m_doc, toQString(s, len));
+ RefPtr<Node> newNode = new CDATASection(m_doc, toString(s, len));
if (!m_currentNode->addChild(newNode.get()))
return;
if (m_view && !newNode->attached())
exitText();
- RefPtr<Node> newNode = new Comment(m_doc, toQString(s));
+ RefPtr<Node> newNode = new Comment(m_doc, toString(s));
m_currentNode->addChild(newNode.get());
if (m_view && !newNode->attached())
newNode->attach();
if (!doc)
return;
- doc->setDocType(new DocumentType(doc, toQString(name), toQString(externalID), toQString(systemID)));
+ doc->setDocType(new DocumentType(doc, toString(name), toString(externalID), toString(systemID)));
}
inline XMLTokenizer *getTokenizer(void *closure)
static xmlEntityPtr getXHTMLEntity(const xmlChar* name)
{
- unsigned short c = decodeNamedEntity(reinterpret_cast<const char*>(name));
+ UChar c = decodeNamedEntity(reinterpret_cast<const char*>(name));
if (!c)
return 0;
- DeprecatedCString value = DeprecatedString(DeprecatedChar(c)).utf8();
+ CString value = String(&c, 1).utf8();
assert(value.length() < 5);
sharedXHTMLEntity.length = value.length();
sharedXHTMLEntity.name = name;
static void externalSubsetHandler(void *closure, const xmlChar *name, const xmlChar *externalId, const xmlChar *systemId)
{
- DeprecatedString extId = toQString(externalId);
+ String extId = toString(externalId);
if ((extId == "-//W3C//DTD XHTML 1.0 Transitional//EN")
|| (extId == "-//W3C//DTD XHTML 1.1//EN")
|| (extId == "-//W3C//DTD XHTML 1.0 Strict//EN")
m_sawError = false;
m_sawXSLTransform = false;
m_sawFirstElement = false;
- m_context = createQStringParser(&sax, this);
+ m_context = createStringParser(&sax, this);
}
void XMLTokenizer::end()
if (errorOccurred)
EventTargetNodeCast(e.get())->dispatchHTMLEvent(errorEvent, true, false);
else {
- m_view->frame()->executeScript(cachedScriptUrl, 0, 0, scriptSource.deprecatedString());
+ m_view->frame()->executeScript(cachedScriptUrl, 0, 0, scriptSource);
EventTargetNodeCast(e.get())->dispatchHTMLEvent(loadEvent, false, false);
}
}
#ifdef KHTML_XSLT
-void *xmlDocPtrForString(DocLoader* docLoader, const DeprecatedString &source, const DeprecatedString &url)
+void* xmlDocPtrForString(DocLoader* docLoader, const String& source, const DeprecatedString &url)
{
if (source.isEmpty())
- return 0;
+ return 0;
+
// Parse in a single chunk into an xmlDocPtr
// FIXME: Hook up error handlers so that a failure to parse the main document results in
// good error messages.
- const DeprecatedChar BOM(0xFEFF);
- const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char *>(&BOM);
+ const UChar BOM = 0xFEFF;
+ const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
xmlGenericErrorFunc oldErrorFunc = xmlGenericError;
void* oldErrorContext = xmlGenericErrorContext;
setLoaderForLibXMLCallbacks(docLoader);
xmlSetGenericErrorFunc(0, errorFunc);
- xmlDocPtr sourceDoc = xmlReadMemory(reinterpret_cast<const char *>(source.unicode()),
- source.length() * sizeof(DeprecatedChar),
+ xmlDocPtr sourceDoc = xmlReadMemory(reinterpret_cast<const char*>(source.characters()),
+ source.length() * sizeof(UChar),
url.ascii(),
BOMHighByte == 0xFF ? "UTF-16LE" : "UTF-16BE",
XSLT_PARSE_OPTIONS);
sax.warning = balancedWarningHandler;
sax.initialized = XML_SAX2_MAGIC;
- int result = xmlParseBalancedChunkMemory(0, &sax, &tokenizer, 0,
- (const xmlChar*)(const char*)(string.deprecatedString().utf8()), 0);
+ int result = xmlParseBalancedChunkMemory(0, &sax, &tokenizer, 0, (const xmlChar*)(const char*)(string.utf8()), 0);
return result == 0;
}
xmlSAX2Attributes *attributes = reinterpret_cast<xmlSAX2Attributes *>(libxmlAttributes);
for(int i = 0; i < nb_attributes; i++) {
- DeprecatedString attrLocalName = toQString(attributes[i].localname);
+ String attrLocalName = toString(attributes[i].localname);
int valueLength = (int) (attributes[i].end - attributes[i].value);
String attrValue = toString(attributes[i].value, valueLength);
String attrPrefix = toString(attributes[i].prefix);
memset(&sax, 0, sizeof(sax));
sax.startElementNs = attributesStartElementNsHandler;
sax.initialized = XML_SAX2_MAGIC;
- xmlParserCtxtPtr parser = createQStringParser(&sax, &state);
- DeprecatedString parseString = "<?xml version=\"1.0\"?><attrs " + string.deprecatedString() + " />";
- xmlParseChunk(parser, reinterpret_cast<const char *>(parseString.unicode()), parseString.length() * sizeof(DeprecatedChar), 1);
+ xmlParserCtxtPtr parser = createStringParser(&sax, &state);
+ String parseString = "<?xml version=\"1.0\"?><attrs " + string + " />";
+ xmlParseChunk(parser, reinterpret_cast<const char*>(parseString.characters()), parseString.length() * sizeof(UChar), 1);
if (parser->myDoc)
xmlFreeDoc(parser->myDoc);
xmlFreeParserCtxt(parser);
Tokenizer* newXMLTokenizer(Document*, FrameView* = 0);
#if KHTML_XSLT
-void* xmlDocPtrForString(DocLoader*, const DeprecatedString& source, const DeprecatedString& URL);
+void* xmlDocPtrForString(DocLoader*, const String& source, const DeprecatedString& URL);
void setLoaderForLibXMLCallbacks(DocLoader*);
#endif
HashMap<String, String> parseAttributes(const String&, bool& attrsOK);
#include "config.h"
#include "FormDataList.h"
+#include "DeprecatedCString.h"
+
namespace WebCore {
FormDataList::FormDataList(const TextEncoding& c)
{
}
-void FormDataList::appendString(const DeprecatedCString &s)
+void FormDataList::appendString(const CString &s)
{
m_list.append(s);
}
return result;
}
-void FormDataList::appendString(const DeprecatedString &s)
+void FormDataList::appendString(const String& s)
{
- DeprecatedCString cstr = fixLineBreaks(m_encoding.fromUnicode(s, true));
+ DeprecatedCString cstr = fixLineBreaks(m_encoding.encode(s.characters(), s.length(), true).deprecatedCString());
cstr.truncate(cstr.length());
- m_list.append(cstr);
+ m_list.append(CString(cstr));
}
void FormDataList::appendFile(const String &key, const String &filename)
{
- appendString(key.deprecatedString());
- m_list.append(filename.deprecatedString());
+ appendString(key);
+ m_list.append(filename);
}
} // namespace
#ifndef HTML_FormDataList_h
#define HTML_FormDataList_h
+#include "CString.h"
#include "DeprecatedValueList.h"
#include "PlatformString.h"
#include "TextEncoding.h"
namespace WebCore {
struct FormDataListItem {
- FormDataListItem(const DeprecatedCString& data) : m_data(data) { }
- FormDataListItem(const DeprecatedString& path) : m_path(path) { }
+ FormDataListItem(const CString& data) : m_data(data) { }
+ FormDataListItem(const String& path) : m_path(path) { }
- DeprecatedString m_path;
- DeprecatedCString m_data;
+ String m_path;
+ CString m_data;
};
class FormDataList {
FormDataList(const TextEncoding&);
void appendData(const String& key, const String& value)
- { appendString(key.deprecatedString()); appendString(value.deprecatedString()); }
- void appendData(const String& key, const DeprecatedString& value)
- { appendString(key.deprecatedString()); appendString(value); }
- void appendData(const String& key, const DeprecatedCString& value)
- { appendString(key.deprecatedString()); appendString(value); }
+ { appendString(key); appendString(value); }
+ void appendData(const String& key, const CString& value)
+ { appendString(key); appendString(value); }
void appendData(const String& key, int value)
- { appendString(key.deprecatedString()); appendString(DeprecatedString::number(value)); }
+ { appendString(key); appendString(String::number(value)); }
void appendFile(const String& key, const String& filename);
DeprecatedValueListConstIterator<FormDataListItem> begin() const
{ return m_list.end(); }
private:
- void appendString(const DeprecatedCString&);
- void appendString(const DeprecatedString&);
+ void appendString(const CString&);
+ void appendString(const String&);
TextEncoding m_encoding;
DeprecatedValueList<FormDataListItem> m_list;
#include "HTMLDocument.h"
#include "CSSPropertyNames.h"
+#include "CString.h"
#include "CookieJar.h"
#include "DocumentType.h"
#include "ExceptionCode.h"
const int PARSEMODE_HAVE_SYSTEM_ID = (1<<2);
const int PARSEMODE_HAVE_INTERNAL = (1<<3);
-static int parseDocTypePart(const DeprecatedString& buffer, int index)
+static int parseDocTypePart(const String& buffer, int index)
{
while (true) {
- DeprecatedChar ch = buffer[index];
+ UChar ch = buffer[index];
if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r')
++index;
else if (ch == '-') {
}
}
-static bool containsString(const char* str, const DeprecatedString& buffer, int offset)
+static bool containsString(const char* str, const String& buffer, int offset)
{
- DeprecatedString startString(str);
+ String startString(str);
if (offset + startString.length() > buffer.length())
return false;
- DeprecatedString bufferString = buffer.mid(offset, startString.length()).lower();
- DeprecatedString lowerStart = startString.lower();
+ String bufferString = buffer.substring(offset, startString.length()).lower();
+ String lowerStart = startString.lower();
return bufferString.startsWith(lowerStart);
}
-static bool parseDocTypeDeclaration(const DeprecatedString& buffer,
+static bool parseDocTypeDeclaration(const String& buffer,
int* resultFlags,
- DeprecatedString& name,
- DeprecatedString& publicID,
- DeprecatedString& systemID)
+ String& name,
+ String& publicID,
+ String& systemID)
{
bool haveDocType = false;
*resultFlags = 0;
do {
index = buffer.find('<', index);
if (index == -1) break;
- DeprecatedChar nextChar = buffer[index+1];
+ UChar nextChar = buffer[index+1];
if (nextChar == '!') {
if (containsString("doctype", buffer, index+2)) {
haveDocType = true;
if (!containsString("html", buffer, index))
return false;
- name = buffer.mid(index, 4);
+ name = buffer.substring(index, 4);
index = parseDocTypePart(buffer, index+4);
bool hasPublic = containsString("public", buffer, index);
if (hasPublic) {
// We've read <!DOCTYPE HTML PUBLIC (not case sensitive).
// Now we find the beginning and end of the public identifers
// and system identifiers (assuming they're even present).
- DeprecatedChar theChar = buffer[index];
+ UChar theChar = buffer[index];
if (theChar != '\"' && theChar != '\'')
return false;
if (publicIDEnd == -1)
return false;
index = parseDocTypePart(buffer, publicIDEnd+1);
- DeprecatedChar next = buffer[index];
+ UChar next = buffer[index];
if (next == '>') {
// Public identifier present, but no system identifier.
// Do nothing. Note that this is the most common
int systemIDEnd = buffer.find(next, systemIDStart);
if (systemIDEnd == -1)
return false;
- systemID = buffer.mid(systemIDStart, systemIDEnd - systemIDStart);
+ systemID = buffer.substring(systemIDStart, systemIDEnd - systemIDStart);
}
else if (next == '[') {
// We found an internal subset.
return false; // Something's wrong.
// We need to trim whitespace off the public identifier.
- publicID = buffer.mid(publicIDStart, publicIDEnd - publicIDStart);
- publicID = publicID.stripWhiteSpace();
+ publicID = buffer.substring(publicIDStart, publicIDEnd - publicIDStart);
+ publicID = publicID.deprecatedString().stripWhiteSpace();
*resultFlags |= PARSEMODE_HAVE_PUBLIC_ID;
} else {
if (containsString("system", buffer, index)) {
// Doctype has a system ID but no public ID
*resultFlags |= PARSEMODE_HAVE_SYSTEM_ID;
index = parseDocTypePart(buffer, index+6);
- DeprecatedChar next = buffer[index];
+ UChar next = buffer[index];
if (next != '\"' && next != '\'')
return false;
int systemIDStart = index+1;
int systemIDEnd = buffer.find(next, systemIDStart);
if (systemIDEnd == -1)
return false;
- systemID = buffer.mid(systemIDStart, systemIDEnd - systemIDStart);
+ systemID = buffer.substring(systemIDStart, systemIDEnd - systemIDStart);
index = parseDocTypePart(buffer, systemIDEnd+1);
}
- DeprecatedChar nextChar = buffer[index];
+ UChar nextChar = buffer[index];
if (nextChar == '[')
*resultFlags |= PARSEMODE_HAVE_INTERNAL;
else if (nextChar != '>')
return true;
}
-void HTMLDocument::determineParseMode( const DeprecatedString &str )
+void HTMLDocument::determineParseMode(const String& str)
{
// This code more or less mimics Mozilla's implementation (specifically the
// doctype parsing implemented by David Baron in Mozilla's nsParser.cpp).
// STRICT - no quirks apply. Web pages will obey the specifications to
// the letter.
- DeprecatedString name, systemID, publicID;
+ String name, systemID, publicID;
int resultFlags = 0;
if (parseDocTypeDeclaration(str, &resultFlags, name, publicID, systemID)) {
if (resultFlags & PARSEMODE_HAVE_DOCTYPE)
else {
// We have to check a list of public IDs to see what we
// should do.
- DeprecatedString lowerPubID = publicID.lower();
- const char* pubIDStr = lowerPubID.latin1();
+ String lowerPubID = publicID.lower();
+ CString pubIDStr = lowerPubID.latin1();
// Look up the entry in our gperf-generated table.
- const PubIDInfo* doctypeEntry = findDoctypeEntry(pubIDStr, publicID.length());
+ const PubIDInfo* doctypeEntry = findDoctypeEntry(pubIDStr, pubIDStr.length());
if (!doctypeEntry) {
// The DOCTYPE is not in the list. Assume strict mode.
pMode = Strict;
*
*/
-#ifndef HTML_DOCUMENTIMPL_H
-#define HTML_DOCUMENTIMPL_H
+#ifndef HTMLDocument_h
+#define HTMLDocument_h
#include "CachedResourceClient.h"
#include "Document.h"
namespace WebCore {
-class DeprecatedString;
class FrameView;
class HTMLElement;
virtual PassRefPtr<Element> createElement(const String& tagName, ExceptionCode&);
- virtual void determineParseMode(const DeprecatedString&);
+ virtual void determineParseMode(const String&);
void addNamedItem(const String& name);
void removeNamedItem(const String& name);
#include "config.h"
#include "HTMLFormElement.h"
+#include "CString.h"
#include "EventNames.h"
#include "FormDataList.h"
#include "Frame.h"
using namespace EventNames;
using namespace HTMLNames;
-HTMLFormElement::HTMLFormElement(Document *doc)
+HTMLFormElement::HTMLFormElement(Document* doc)
: HTMLElement(formTag, doc)
{
collectionInfo = 0;
prepareSubmit();
}
-static DeprecatedCString encodeCString(const DeprecatedCString& e)
+static DeprecatedCString encodeCString(const CString& cstr)
{
+ DeprecatedCString e = cstr.deprecatedCString();
+
// http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1
- // safe characters like NS handles them for compatibility
+ // same safe characters as Netscape for compatibility
static const char *safe = "-._*";
int elen = e.length();
- DeprecatedCString encoded(( elen+e.contains( '\n' ) )*3+1);
+ DeprecatedCString encoded((elen + e.contains('\n')) * 3 + 1);
int enclen = 0;
- for(int pos = 0; pos < elen; pos++) {
+ for (int pos = 0; pos < elen; pos++) {
unsigned char c = e[pos];
- if ( (( c >= 'A') && ( c <= 'Z')) ||
- (( c >= 'a') && ( c <= 'z')) ||
- (( c >= '0') && ( c <= '9')) ||
- (strchr(safe, c))
- )
+ if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || strchr(safe, c))
encoded[enclen++] = c;
- else if ( c == ' ' )
+ else if (c == ' ')
encoded[enclen++] = '+';
- else if ( c == '\n' || ( c == '\r' && e[pos+1] != '\n' ) )
- {
+ else if (c == '\n' || (c == '\r' && e[pos + 1] != '\n')) {
encoded[enclen++] = '%';
encoded[enclen++] = '0';
encoded[enclen++] = 'D';
encoded[enclen++] = '%';
encoded[enclen++] = '0';
encoded[enclen++] = 'A';
- }
- else if ( c != '\r' )
- {
+ } else if (c != '\r') {
encoded[enclen++] = '%';
unsigned int h = c / 16;
h += (h > 9) ? ('A' - 10) : '0';
return encoded;
}
-bool HTMLFormElement::formData(FormData &form_data) const
+bool HTMLFormElement::formData(FormData& result) const
{
DeprecatedCString enc_string = ""; // used for non-multipart data
DeprecatedString str = m_acceptcharset.deprecatedString();
str.replace(',', ' ');
DeprecatedStringList charsets = DeprecatedStringList::split(' ', str);
- TextEncoding encoding(InvalidEncoding);
- Frame *frame = document()->frame();
- for (DeprecatedStringList::Iterator it = charsets.begin(); it != charsets.end(); ++it) {
- if ((encoding = TextEncoding((*it).latin1())).isValid())
+ TextEncoding encoding;
+ Frame* frame = document()->frame();
+ for (DeprecatedStringList::Iterator it = charsets.begin(); it != charsets.end(); ++it)
+ if ((encoding = TextEncoding(*it)).isValid())
break;
- }
-
if (!encoding.isValid()) {
if (frame)
- encoding = TextEncoding(frame->encoding().latin1());
+ encoding = frame->encoding();
else
- encoding = TextEncoding(Latin1Encoding);
+ encoding = Latin1Encoding();
}
for (unsigned i = 0; i < formElements.size(); ++i) {
// but only if its the first entry
if ( enc_string.isEmpty() && (*it).m_data == "isindex" ) {
++it;
- enc_string += encodeCString( (*it).m_data );
+ enc_string += encodeCString((*it).m_data);
}
else {
if(!enc_string.isEmpty())
// things if the filename includes characters you can't encode
// in the website's character set.
hstr += "; filename=\"";
- hstr += encoding.fromUnicode(path.mid(path.findRev('/') + 1), true);
+ int start = path.findRev('/') + 1;
+ int length = path.length() - start;
+ hstr += encoding.encode(reinterpret_cast<const UChar*>(path.unicode() + start), length, true);
hstr += "\"";
if (!static_cast<HTMLInputElement*>(current)->value().isEmpty()) {
++it;
// append body
- form_data.appendData(hstr.data(), hstr.length());
- const FormDataListItem &item = *it;
- size_t dataSize = item.m_data.size();
- if (dataSize != 0)
- form_data.appendData(item.m_data, dataSize - 1);
+ result.appendData(hstr.data(), hstr.length());
+ const FormDataListItem& item = *it;
+ if (size_t dataSize = item.m_data.length())
+ result.appendData(item.m_data, dataSize);
else if (!item.m_path.isEmpty())
- form_data.appendFile(item.m_path);
- form_data.appendData("\r\n", 2);
+ result.appendFile(item.m_path);
+ result.appendData("\r\n", 2);
}
}
}
if (m_multipart)
enc_string = ("--" + m_boundary.deprecatedString() + "--\r\n").ascii();
- form_data.appendData(enc_string.data(), enc_string.length());
+ result.appendData(enc_string.data(), enc_string.length());
return true;
}
if (!m_post)
m_multipart = false;
- FormData form_data;
- if (formData(form_data)) {
- if(m_post)
- frame->submitForm("post", m_url, form_data, m_target, enctype(), boundary());
+ FormData postData;
+ if (formData(postData)) {
+ if (m_post)
+ frame->submitForm("post", m_url, postData, m_target, enctype(), boundary());
else
- frame->submitForm("get", m_url, form_data, m_target);
+ frame->submitForm("get", m_url, postData, m_target);
}
if (needButtonActivation && firstSuccessfulSubmitButton)
// If no filename at all is entered, return successful but empty.
// Null would be more logical, but Netscape posts an empty file. Argh.
if (value().isEmpty()) {
- encoding.appendData(name(), DeprecatedString(""));
+ encoding.appendData(name(), String(""));
return true;
}
const AtomicString& url = attr->value();
if (!url.isEmpty()) {
- DeprecatedString charset = getAttribute(charsetAttr).deprecatedString();
- m_cachedScript = document()->docLoader()->requestScript(url, charset);
+ m_cachedScript = document()->docLoader()->requestScript(url, getAttribute(charsetAttr));
m_cachedScript->ref(this);
}
} else if (attrName == onloadAttr)
#endif
// The parser might have been stopped by for example a window.close call in an earlier script.
// If so, we don't want to load scripts.
- if (!m_parserStopped && (cs = m_doc->docLoader()->requestScript(scriptSrc, scriptSrcCharset) ))
+ if (!m_parserStopped && (cs = m_doc->docLoader()->requestScript(scriptSrc, scriptSrcCharset)))
pendingScripts.enqueue(cs);
else
scriptNode = 0;
Attribute* a = 0;
bool foundTypeAttribute = false;
scriptSrc = DeprecatedString::null;
- scriptSrcCharset = DeprecatedString::null;
+ scriptSrcCharset = String();
if ( currToken.attrs && /* potentially have a ATTR_SRC ? */
m_doc->frame() &&
m_doc->frame()->jScriptEnabled() && /* jscript allowed at all? */
scriptSrc = m_doc->completeURL(parseURL(a->value()).deprecatedString());
if ((a = currToken.attrs->getAttributeItem(charsetAttr)))
scriptSrcCharset = a->value().deprecatedString().stripWhiteSpace();
- if ( scriptSrcCharset.isEmpty() )
+ if (scriptSrcCharset.isEmpty())
scriptSrcCharset = m_doc->frame()->encoding();
/* Check type before language, since language is deprecated */
if ((a = currToken.attrs->getAttributeItem(typeAttr)) != 0 && !a->value().isEmpty())
return true;
}
-bool HTMLTokenizer::write(const SegmentedString &str, bool appendData)
+bool HTMLTokenizer::write(const SegmentedString& str, bool appendData)
{
#ifdef TOKEN_DEBUG
kdDebug( 6036 ) << this << " Tokenizer::write(\"" << str.toString() << "\"," << appendData << ")" << endl;
scriptCode[scriptCodeSize] = 0;
scriptCode[scriptCodeSize + 1] = 0;
int pos;
- DeprecatedString food;
+ String food;
if (m_state.inScript() || m_state.inStyle())
- food.setUnicode(reinterpret_cast<DeprecatedChar*>(scriptCode), scriptCodeSize);
+ food = String(scriptCode, scriptCodeSize);
else if (m_state.inServer()) {
food = "<";
- food += DeprecatedString(reinterpret_cast<DeprecatedChar*>(scriptCode), scriptCodeSize);
+ food.append(String(scriptCode, scriptCodeSize));
} else {
pos = DeprecatedConstString(reinterpret_cast<DeprecatedChar*>(scriptCode), scriptCodeSize).string().find('>');
- food.setUnicode(reinterpret_cast<DeprecatedChar*>(scriptCode) + pos + 1, scriptCodeSize - pos - 1); // deep copy
+ food = String(scriptCode + pos + 1, scriptCodeSize - pos - 1);
}
fastFree(scriptCode);
scriptCode = 0;
src.resetLineCount();
}
-void parseHTMLDocumentFragment(const String &source, DocumentFragment *fragment)
+void parseHTMLDocumentFragment(const String& source, DocumentFragment* fragment)
{
HTMLTokenizer tok(fragment);
tok.setForceSynchronous(true);
- tok.write(source.deprecatedString(), true);
+ tok.write(source, true);
tok.finish();
ASSERT(!tok.processingData()); // make sure we're done (see 3963151)
}
bool noMoreData;
// URL to get source code of script from
DeprecatedString scriptSrc;
- DeprecatedString scriptSrcCharset;
+ String scriptSrcCharset;
bool javascript;
// the HTML code we will parse after the external script we are waiting for has loaded
SegmentedString pendingSrc;
return static_cast<CachedImage *>(o);
}
-CachedCSSStyleSheet* Cache::requestStyleSheet(DocLoader* dl, const String& url, bool reload, time_t expireDate, const DeprecatedString& charset)
+CachedCSSStyleSheet* Cache::requestStyleSheet(DocLoader* dl, const String& url, bool reload, time_t expireDate, const String& charset)
{
// this brings the _url to a standard form...
KURL kurl;
return static_cast<CachedCSSStyleSheet *>(o);
}
-CachedScript* Cache::requestScript(DocLoader* dl, const String& url, bool reload, time_t expireDate, const DeprecatedString& charset)
+CachedScript* Cache::requestScript(DocLoader* dl, const String& url, bool reload, time_t expireDate, const String& charset)
{
// this brings the _url to a standard form...
KURL kurl;
*/
static CachedImage* requestImage(DocLoader*, const String& URL, bool reload = false, time_t expireDate = 0);
static CachedImage* requestImage(DocLoader*, const KURL& URL, bool reload, time_t expireDate);
- static CachedCSSStyleSheet* requestStyleSheet(DocLoader*, const String& URL, bool reload, time_t expireDate, const DeprecatedString& charset);
- static CachedScript* requestScript(DocLoader*, const String& URL, bool reload, time_t expireDate, const DeprecatedString& charset);
+ static CachedCSSStyleSheet* requestStyleSheet(DocLoader*, const String& URL, bool reload, time_t expireDate, const String& charset);
+ static CachedScript* requestScript(DocLoader*, const String& URL, bool reload, time_t expireDate, const String& charset);
#ifdef KHTML_XSLT
static CachedXSLStyleSheet* requestXSLStyleSheet(DocLoader*, const String& URL, bool reload, time_t expireDate);
#include "CachedResourceClient.h"
#include "CachedResourceClientWalker.h"
#include "Decoder.h"
+#include "DeprecatedString.h"
#include "LoaderFunctions.h"
#include "loader.h"
#include <wtf/Vector.h>
namespace WebCore {
-CachedCSSStyleSheet::CachedCSSStyleSheet(DocLoader* dl, const String &url, CachePolicy cachePolicy, time_t _expireDate, const DeprecatedString& charset)
+CachedCSSStyleSheet::CachedCSSStyleSheet(DocLoader* dl, const String& url, CachePolicy cachePolicy, time_t _expireDate, const String& charset)
: CachedResource(url, CSSStyleSheet, cachePolicy, _expireDate)
- , m_decoder(new Decoder("text/css"))
+ , m_decoder(new Decoder("text/css", charset))
{
// It's css we want.
setAccept("text/css");
m_loading = true;
}
-CachedCSSStyleSheet::CachedCSSStyleSheet(const String &url, const DeprecatedString &stylesheet_data)
+CachedCSSStyleSheet::CachedCSSStyleSheet(const String &url, const String& stylesheet_data)
: CachedResource(url, CSSStyleSheet, CachePolicyVerify, 0, stylesheet_data.length())
{
m_loading = false;
m_status = Persistent;
- m_sheet = String(stylesheet_data);
+ m_sheet = stylesheet_data;
}
CachedCSSStyleSheet::~CachedCSSStyleSheet()
delete this;
}
-void CachedCSSStyleSheet::setCharset(const DeprecatedString& chs)
+void CachedCSSStyleSheet::setCharset(const String& chs)
{
- if (!chs.isEmpty())
- m_decoder->setEncodingName(chs.latin1(), Decoder::EncodingFromHTTPHeader);
+ m_decoder->setEncoding(chs, Decoder::EncodingFromHTTPHeader);
}
void CachedCSSStyleSheet::data(Vector<char>& data, bool allDataReceived)
#include "TextEncoding.h"
#include <wtf/Vector.h>
-namespace WebCore
-{
+namespace WebCore {
+
class DocLoader;
class Decoder;
- class CachedCSSStyleSheet : public CachedResource
- {
+ class CachedCSSStyleSheet : public CachedResource {
public:
- CachedCSSStyleSheet(DocLoader*, const String& URL, CachePolicy, time_t expireDate, const DeprecatedString& charset);
- CachedCSSStyleSheet(const String& URL, const DeprecatedString& stylesheetData);
+ CachedCSSStyleSheet(DocLoader*, const String& URL, CachePolicy, time_t expireDate, const String& charset);
+ CachedCSSStyleSheet(const String& URL, const String& stylesheetData);
virtual ~CachedCSSStyleSheet();
const String& sheet() const { return m_sheet; }
virtual void ref(CachedResourceClient*);
virtual void deref(CachedResourceClient*);
- virtual void setCharset(const DeprecatedString&);
+ virtual void setCharset(const String&);
virtual void data(Vector<char>&, bool allDataReceived);
virtual void error();
#ifndef CachedResource_h
#define CachedResource_h
-#include "DeprecatedString.h"
#include "CachePolicy.h"
#include "PlatformString.h"
#include <wtf/HashSet.h>
class NSURLResponse;
#endif
-namespace WebCore
-{
+namespace WebCore {
class CachedResourceClient;
class Request;
*
* This class also does the actual communication with kio and loads the file.
*/
- class CachedResource
- {
+ class CachedResource {
public:
enum Type {
ImageResource,
}
virtual ~CachedResource();
- virtual void setCharset(const DeprecatedString&) { }
+ virtual void setCharset(const String&) { }
virtual Vector<char>& bufferData(const char* bytes, int addedSize, Request*);
virtual void data(Vector<char>&, bool allDataReceived) = 0;
virtual void error() = 0;
// List of acceptable MIME types seperated by ",".
// A MIME type may contain a wildcard, e.g. "text/*".
- DeprecatedString accept() const { return m_accept; }
- void setAccept(const DeprecatedString& accept) { m_accept = accept; }
+ String accept() const { return m_accept; }
+ void setAccept(const String& accept) { m_accept = accept; }
protected:
void setSize(int size);
HashSet<CachedResourceClient*> m_clients;
String m_url;
- DeprecatedString m_accept;
- Request *m_request;
+ String m_accept;
+ Request* m_request;
+
#if __APPLE__
NSURLResponse *m_response;
NSData *m_allData;
#endif
+
Type m_type;
Status m_status;
+
private:
int m_size;
int m_accessCount;
private:
bool allowInLRUList() const { return canDelete() && status() != Persistent; }
- CachedResource *m_nextInLRUList;
- CachedResource *m_prevInLRUList;
+ CachedResource* m_nextInLRUList;
+ CachedResource* m_prevInLRUList;
friend class Cache;
};
namespace WebCore {
-CachedScript::CachedScript(DocLoader* dl, const String &url, CachePolicy cachePolicy, time_t _expireDate, const DeprecatedString& charset)
+CachedScript::CachedScript(DocLoader* dl, const String& url, CachePolicy cachePolicy, time_t _expireDate, const String& charset)
: CachedResource(url, Script, cachePolicy, _expireDate)
- , m_encoding(charset.latin1())
+ , m_encoding(charset)
{
// It's javascript we want.
// But some websites think their scripts are <some wrong mimetype here>
Cache::loader()->load(dl, this, false);
m_loading = true;
if (!m_encoding.isValid())
- m_encoding = TextEncoding(Latin1Encoding);
+ m_encoding = Latin1Encoding();
}
-CachedScript::CachedScript(const String &url, const DeprecatedString &script_data)
- : CachedResource(url, Script, CachePolicyVerify, 0, script_data.length())
- , m_encoding(InvalidEncoding)
+CachedScript::CachedScript(const String& url, const String& scriptData)
+ : CachedResource(url, Script, CachePolicyVerify, 0, scriptData.length())
{
m_errorOccurred = false;
m_loading = false;
m_status = Persistent;
- m_script = String(script_data);
+ m_script = scriptData;
}
CachedScript::~CachedScript()
{
}
-void CachedScript::ref(CachedResourceClient *c)
+void CachedScript::ref(CachedResourceClient* c)
{
CachedResource::ref(c);
-
- if(!m_loading) c->notifyFinished(this);
+ if (!m_loading)
+ c->notifyFinished(this);
}
-void CachedScript::deref(CachedResourceClient *c)
+void CachedScript::deref(CachedResourceClient* c)
{
Cache::flush();
CachedResource::deref(c);
- if ( canDelete() && m_free )
- delete this;
+ if (canDelete() && m_free)
+ delete this;
}
-void CachedScript::setCharset(const DeprecatedString &chs)
+void CachedScript::setCharset(const String& chs)
{
- TextEncoding encoding = TextEncoding(chs.latin1());
+ TextEncoding encoding(chs);
if (encoding.isValid())
m_encoding = encoding;
}
return;
setSize(data.size());
- m_script = String(m_encoding.toUnicode(data.data(), size()));
+ m_script = m_encoding.decode(data.data(), size());
m_loading = false;
checkNotify();
}
return;
CachedResourceClientWalker w(m_clients);
- while (CachedResourceClient *c = w.next())
+ while (CachedResourceClient* c = w.next())
c->notifyFinished(this);
}
#include "CachedResource.h"
#include "TextEncoding.h"
-#include <wtf/Vector.h>
namespace WebCore {
+
class DocLoader;
class CachedScript : public CachedResource {
public:
- CachedScript(DocLoader*, const String& URL, CachePolicy, time_t expireDate, const DeprecatedString& charset);
- CachedScript(const String& URL, const DeprecatedString& scriptData);
+ CachedScript(DocLoader*, const String& URL, CachePolicy, time_t expireDate, const String& charset);
+ CachedScript(const String& URL, const String& scriptData);
virtual ~CachedScript();
const String& script() const { return m_script; }
virtual void ref(CachedResourceClient*);
virtual void deref(CachedResourceClient*);
- virtual void setCharset(const DeprecatedString&);
+ virtual void setCharset(const String&);
virtual void data(Vector<char>&, bool allDataReceived);
virtual void error();
delete this;
}
-void CachedXBLDocument::setCharset( const DeprecatedString &chs )
+void CachedXBLDocument::setCharset(const String& chs)
{
- if (!chs.isEmpty())
- m_decoder->setEncoding(chs.latin1(), Decoder::EncodingFromHTTPHeader);
+ m_decoder->setEncoding(chs, Decoder::EncodingFromHTTPHeader);
}
void CachedXBLDocument::data(Vector<char>& data, bool )
class CachedResourceClient;
#ifndef KHTML_NO_XBL
- class CachedXBLDocument : public CachedResource
- {
+ class CachedXBLDocument : public CachedResource {
public:
CachedXBLDocument(DocLoader*, const String& url, CachePolicy, time_t expireDate);
virtual ~CachedXBLDocument();
virtual void ref(CachedResourceClient*);
virtual void deref(CachedResourceClient*);
- virtual void setCharset(const DeprecatedString&);
+ virtual void setCharset(const String&);
virtual void data(Vector<char>&, bool allDataReceived);
virtual void error();
delete this;
}
-void CachedXSLStyleSheet::setCharset( const DeprecatedString &chs )
+void CachedXSLStyleSheet::setCharset(const String& chs)
{
- if (!chs.isEmpty())
- m_decoder->setEncodingName(chs.latin1(), Decoder::EncodingFromHTTPHeader);
+ m_decoder->setEncoding(chs, Decoder::EncodingFromHTTPHeader);
}
void CachedXSLStyleSheet::data(Vector<char>& data, bool allDataReceived)
virtual void ref(CachedResourceClient*);
virtual void deref(CachedResourceClient*);
- virtual void setCharset(const DeprecatedString&);
+ virtual void setCharset(const String&);
virtual void data(Vector<char>&, bool allDataReceived);
virtual void error();
#include "CString.h"
#include "DOMImplementation.h"
+#include "DeprecatedCString.h"
+#include "DeprecatedString.h"
#include "HTMLNames.h"
#include "StreamingTextDecoder.h"
-#include "RegularExpression.h"
-using namespace WebCore;
+namespace WebCore {
+
using namespace HTMLNames;
-class KanjiCode
-{
+class KanjiCode {
public:
enum Type { ASCII, JIS, EUC, SJIS, UTF16, UTF8 };
- static enum Type judge(const char *str, int length);
- static const int ESC;
- static const int _SS2_;
- static const unsigned char kanji_map_sjis[];
+ static enum Type judge(const char* str, int length);
+ static const int ESC = 0x1b;
+ static const unsigned char sjisMap[256];
static int ISkanji(int code)
{
if (code >= 0x100)
- return 0;
- return (kanji_map_sjis[code & 0xff] & 1);
+ return 0;
+ return sjisMap[code & 0xff] & 1;
}
-
static int ISkana(int code)
{
if (code >= 0x100)
- return 0;
- return (kanji_map_sjis[code & 0xff] & 2);
+ return 0;
+ return sjisMap[code & 0xff] & 2;
}
-
};
-const int KanjiCode::ESC = 0x1b;
-const int KanjiCode::_SS2_ = 0x8e;
-
-const unsigned char KanjiCode::kanji_map_sjis[] =
-{
+const unsigned char KanjiCode::sjisMap[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
* Special Thanks to Kenichi Tsuchida
*/
-enum KanjiCode::Type KanjiCode::judge(const char *str, int size)
+enum KanjiCode::Type KanjiCode::judge(const char* str, int size)
{
enum Type code;
int i;
int sjis = 0;
int euc = 0;
- const unsigned char *ptr = (const unsigned char *) str;
+ const unsigned char* ptr = reinterpret_cast<const unsigned char*>(str);
code = ASCII;
return (code);
}
-Decoder::Decoder(const String& mimeType, const String& defaultEncodingName)
- : m_encoding(defaultEncodingName.isNull() ? "iso8859-1" : defaultEncodingName.latin1())
- , m_encodingName(m_encoding.name())
- , m_type(DefaultEncoding)
- , m_reachedBody(false)
- , m_checkedForCSSCharset(false)
- , m_checkedForBOM(false)
+Decoder::ContentType Decoder::determineContentType(const String& mimeType)
{
- if (mimeType == "text/css")
- m_contentType = CSS;
- else if (mimeType == "text/html")
- m_contentType = HTML;
- else if (DOMImplementation::isXMLMIMEType(mimeType)) {
- m_contentType = XML;
- // Despite 8.5 "Text/xml with Omitted Charset" of RFC 3023, we do not assume us-ascii
- // for text/xml, to match Firefox.
- m_encoding = TextEncoding(UTF8Encoding);
- m_encodingName = "UTF-8";
- } else
- m_contentType = PlainText;
-
- if (m_encoding.isValid())
- m_decoder.set(StreamingTextDecoder::create(m_encoding));
- else
- setEncodingName("iso-8859-1", DefaultEncoding);
+ if (equalIgnoringCase(mimeType, "text/css"))
+ return CSS;
+ if (equalIgnoringCase(mimeType, "text/html"))
+ return HTML;
+ if (DOMImplementation::isXMLMIMEType(mimeType))
+ return XML;
+ return PlainText;
}
-Decoder::~Decoder()
+const TextEncoding& Decoder::defaultEncoding(ContentType contentType, const TextEncoding& specifiedDefaultEncoding)
{
+ // Despite 8.5 "Text/xml with Omitted Charset" of RFC 3023, we assume UTF-8 instead of US-ASCII
+ // for text/xml. This matches Firefox.
+ if (contentType == XML)
+ return UTF8Encoding();
+ if (!specifiedDefaultEncoding.isValid())
+ return Latin1Encoding();
+ return specifiedDefaultEncoding;
}
-void Decoder::setEncodingName(const char* encodingName, EncodingSource type)
+Decoder::Decoder(const String& mimeType, const TextEncoding& specifiedDefaultEncoding)
+ : m_contentType(determineContentType(mimeType))
+ , m_decoder(defaultEncoding(m_contentType, specifiedDefaultEncoding))
+ , m_source(DefaultEncoding)
+ , m_checkedForBOM(false)
+ , m_checkedForCSSCharset(false)
+ , m_checkedForHeadCharset(false)
{
- if (encodingName[0] == '\0')
- return;
-
- bool eightBitOnly = type == EncodingFromMetaTag || type == EncodingFromXMLHeader || type == EncodingFromCSSCharset;
- TextEncoding encoding = TextEncoding(encodingName, eightBitOnly);
-
- // in case the encoding didn't exist, we keep the old one (fixes some sites specifying invalid encodings)
- if (encoding.isValid()) {
- m_encodingName = encoding.name(); // use a standard name for the encoding
- m_encoding = encoding;
- m_type = type;
- m_decoder.set(StreamingTextDecoder::create(m_encoding));
- }
}
-const char* Decoder::encodingName() const
+Decoder::~Decoder()
{
- return m_encodingName;
}
-// Other browsers allow comments in the head section, so we need to also.
-// It's important not to look for tags inside the comments.
-static void skipComment(const char *&ptr, const char *pEnd)
+void Decoder::setEncoding(const TextEncoding& encoding, EncodingSource source)
{
- const char *p = ptr;
- // Allow <!-->; other browsers do.
- if (*p == '>') {
- p++;
- } else {
- while (p != pEnd) {
- if (*p == '-') {
- // This is the real end of comment, "-->".
- if (p[1] == '-' && p[2] == '>') {
- p += 3;
- break;
- }
- // This is the incorrect end of comment that other browsers allow, "--!>".
- if (p[1] == '-' && p[2] == '!' && p[3] == '>') {
- p += 4;
- break;
- }
- }
- p++;
- }
- }
- ptr = p;
+ // In case the encoding didn't exist, we keep the old one (helps some sites specifying invalid encodings).
+ if (!encoding.isValid())
+ return;
+
+ if (source == EncodingFromMetaTag || source == EncodingFromXMLHeader || source == EncodingFromCSSCharset)
+ m_decoder.reset(encoding.closest8BitEquivalent());
+ else
+ m_decoder.reset(encoding);
+
+ m_source = source;
}
// Returns the position of the encoding string.
return pos != dataEnd;
}
-DeprecatedString Decoder::decode(const char *data, int len)
+void Decoder::checkForBOM(const char* data, size_t len)
{
// Check for UTF-16 or UTF-8 BOM mark at the beginning, which is a sure sign of a Unicode encoding.
- int bufferLength = m_buffer.length();
- const int maximumBOMLength = 3;
- if (!m_checkedForBOM && bufferLength + len >= maximumBOMLength) {
- if (m_type != UserChosenEncoding) {
- // Extract the first three bytes.
- // Handle the case where some of bytes are already in the buffer.
- // The last byte is always guaranteed to not be in the buffer.
- const unsigned char *udata = (const unsigned char *)data;
- unsigned char c1 = bufferLength >= 1 ? m_buffer[0].unicode() : *udata++;
- unsigned char c2 = bufferLength >= 2 ? m_buffer[1].unicode() : *udata++;
- ASSERT(bufferLength < 3);
- unsigned char c3 = *udata;
-
- // Check for the BOM.
- const char *autoDetectedEncoding;
- if ((c1 == 0xFE && c2 == 0xFF) || (c1 == 0xFF && c2 == 0xFE)) {
- autoDetectedEncoding = "ISO-10646-UCS-2";
- } else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) {
- autoDetectedEncoding = "UTF-8";
- } else {
- autoDetectedEncoding = 0;
- }
- // If we found a BOM, use the encoding it implies.
- if (autoDetectedEncoding != 0)
- setEncodingName(autoDetectedEncoding, AutoDetectedEncoding);
- }
+ if (m_source == UserChosenEncoding) {
+ // FIXME: Maybe a BOM should override even a user-chosen encoding.
m_checkedForBOM = true;
+ return;
}
-
- bool currentChunkInBuffer = false;
-
- if (m_type == DefaultEncoding && m_contentType == CSS && !m_checkedForCSSCharset) {
- m_buffer.append(data, len);
- currentChunkInBuffer = true;
-
- if (len > 8) { // strlen("@charset") == 8
- const char* dataStart = m_buffer.latin1();
- const char* dataEnd = dataStart + m_buffer.length();
-
- if (dataStart[0] == '@' && dataStart[1] == 'c' && dataStart[2] == 'h' && dataStart[3] == 'a' && dataStart[4] == 'r' &&
- dataStart[5] == 's' && dataStart[6] == 'e' && dataStart[7] == 't') {
-
- dataStart += 8;
- const char* pos = dataStart;
- if (!skipWhitespace(pos, dataEnd))
- return DeprecatedString::null;
- if (*pos == '"' || *pos == '\'') {
- char quotationMark = *pos;
+ // Check if we have enough data.
+ size_t bufferLength = m_buffer.size();
+ if (bufferLength + len < 3)
+ return;
+
+ m_checkedForBOM = true;
+
+ // Extract the first three bytes.
+ // Handle the case where some of bytes are already in the buffer.
+ // The last byte is always guaranteed to not be in the buffer.
+ const unsigned char* udata = reinterpret_cast<const unsigned char*>(data);
+ unsigned char c1 = bufferLength >= 1 ? m_buffer[0] : *udata++;
+ unsigned char c2 = bufferLength >= 2 ? m_buffer[1] : *udata++;
+ ASSERT(bufferLength < 3);
+ unsigned char c3 = *udata;
+
+ // Check for the BOM.
+ if (c1 == 0xFE && c2 == 0xFF)
+ setEncoding(UTF16BigEndianEncoding(), AutoDetectedEncoding);
+ else if (c1 == 0xFF && c2 == 0xFE)
+ setEncoding(UTF16LittleEndianEncoding(), AutoDetectedEncoding);
+ else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF)
+ setEncoding(UTF8Encoding(), AutoDetectedEncoding);
+}
+
+void Decoder::checkForCSSCharset(const char* data, size_t len)
+{
+ if (m_source != DefaultEncoding) {
+ m_checkedForCSSCharset = true;
+ return;
+ }
+
+ size_t oldSize = m_buffer.size();
+ m_buffer.resize(oldSize + len);
+ memcpy(m_buffer.data() + oldSize, data, len);
+
+ if (m_buffer.size() > 8) { // strlen("@charset") == 8
+ const char* dataStart = m_buffer.data();
+ const char* dataEnd = dataStart + m_buffer.size();
+
+ if (dataStart[0] == '@' && dataStart[1] == 'c' && dataStart[2] == 'h' && dataStart[3] == 'a' && dataStart[4] == 'r' &&
+ dataStart[5] == 's' && dataStart[6] == 'e' && dataStart[7] == 't') {
+
+ dataStart += 8;
+ const char* pos = dataStart;
+ if (!skipWhitespace(pos, dataEnd))
+ return;
+
+ if (*pos == '"' || *pos == '\'') {
+ char quotationMark = *pos;
+ ++pos;
+ dataStart = pos;
+
+ while (pos < dataEnd && *pos != quotationMark)
++pos;
- dataStart = pos;
+ if (pos == dataEnd)
+ return;
+
+ DeprecatedCString encodingName(dataStart, pos - dataStart + 1);
- while (pos < dataEnd && *pos != quotationMark)
- ++pos;
- if (pos == dataEnd)
- return DeprecatedString::null;
+ ++pos;
+ if (!skipWhitespace(pos, dataEnd))
+ return;
- DeprecatedCString encodingName(dataStart, pos - dataStart + 1);
-
- ++pos;
- if (!skipWhitespace(pos, dataEnd))
- return DeprecatedString::null;
+ if (*pos == ';')
+ setEncoding(TextEncoding(encodingName), EncodingFromCSSCharset);
+ }
+ }
+ m_checkedForCSSCharset = true;
+ }
+}
- if (*pos == ';')
- setEncodingName(encodingName, EncodingFromCSSCharset);
+// Other browsers allow comments in the head section, so we need to also.
+// It's important not to look for tags inside the comments.
+static inline void skipComment(const char*& ptr, const char* pEnd)
+{
+ const char* p = ptr;
+ // Allow <!-->; other browsers do.
+ if (*p == '>') {
+ p++;
+ } else {
+ while (p != pEnd) {
+ if (*p == '-') {
+ // This is the real end of comment, "-->".
+ if (p[1] == '-' && p[2] == '>') {
+ p += 3;
+ break;
+ }
+ // This is the incorrect end of comment that other browsers allow, "--!>".
+ if (p[1] == '-' && p[2] == '!' && p[3] == '>') {
+ p += 4;
+ break;
}
}
- m_checkedForCSSCharset = true;
+ p++;
}
- return DeprecatedString::null;
+ }
+ ptr = p;
+}
+
+bool Decoder::checkForHeadCharset(const char* data, size_t len, bool& movedDataToBuffer)
+{
+ if (m_source != DefaultEncoding) {
+ m_checkedForHeadCharset = true;
+ return true;
+ }
- } else if (m_type == DefaultEncoding && m_contentType != PlainText && !m_reachedBody) { // HTML and XML
- // this is not completely efficient, since the function might go
- // through the html head several times...
+ // This is not completely efficient, since the function might go
+ // through the HTML head several times.
+
+ size_t oldSize = m_buffer.size();
+ m_buffer.resize(oldSize + len);
+ memcpy(m_buffer.data() + oldSize, data, len);
+
+ movedDataToBuffer = true;
- m_buffer.append(data, len);
- currentChunkInBuffer = true;
-
- // we still don't have an encoding, and are in the head
- // the following tags are allowed in <head>:
- // SCRIPT|STYLE|META|LINK|OBJECT|TITLE|BASE
-
- // We stop scanning when a tag that is not permitted in <head>
- // is seen, rather when </head> is seen, because that more closely
- // matches behavior in other browsers; more details in
- // <http://bugzilla.opendarwin.org/show_bug.cgi?id=3590>.
-
- // Additionally, we ignore things that looks like tags in <title>; see
- // <http://bugzilla.opendarwin.org/show_bug.cgi?id=4560>.
-
- bool withinTitle = false;
-
- const char *ptr = m_buffer.latin1();
- const char *pEnd = ptr + m_buffer.length();
- while (ptr != pEnd) {
- if (*ptr == '<') {
- bool end = false;
- ptr++;
+ // we still don't have an encoding, and are in the head
+ // the following tags are allowed in <head>:
+ // SCRIPT|STYLE|META|LINK|OBJECT|TITLE|BASE
+
+ // We stop scanning when a tag that is not permitted in <head>
+ // is seen, rather when </head> is seen, because that more closely
+ // matches behavior in other browsers; more details in
+ // <http://bugzilla.opendarwin.org/show_bug.cgi?id=3590>.
+
+ // Additionally, we ignore things that looks like tags in <title>; see
+ // <http://bugzilla.opendarwin.org/show_bug.cgi?id=4560>.
+
+ bool withinTitle = false;
+
+ const char* ptr = m_buffer.data();
+ const char* pEnd = ptr + m_buffer.size();
+ while (ptr != pEnd) {
+ if (*ptr == '<') {
+ bool end = false;
+ ptr++;
+
+ // Handle comments.
+ if (ptr[0] == '!' && ptr[1] == '-' && ptr[2] == '-') {
+ ptr += 3;
+ skipComment(ptr, pEnd);
+ continue;
+ }
- // Handle comments.
- if (ptr[0] == '!' && ptr[1] == '-' && ptr[2] == '-') {
- ptr += 3;
- skipComment(ptr, pEnd);
- continue;
- }
-
- // Handle XML declaration, which can have encoding in it.
- // This encoding is honored even for HTML documents.
- if (ptr[0] == '?' && ptr[1] == 'x' && ptr[2] == 'm' && ptr[3] == 'l') {
- const char *end = ptr;
- while (*end != '>' && *end != '\0')
- end++;
- if (*end == '\0')
- break;
- DeprecatedCString str(ptr, end - ptr);
- int len;
- int pos = findXMLEncoding(str, len);
- if (pos != -1)
- setEncodingName(str.mid(pos, len), EncodingFromXMLHeader);
- // continue looking for a charset - it may be specified in an HTTP-Equiv meta
- } else if (ptr[0] == 0 && ptr[1] == '?' && ptr[2] == 0 && ptr[3] == 'x' && ptr[4] == 0 && ptr[5] == 'm' && ptr[6] == 0 && ptr[7] == 'l') {
- // UTF-16 without BOM
- setEncodingName(((ptr - m_buffer.latin1()) % 2) ? "UTF-16LE" : "UTF-16BE", AutoDetectedEncoding);
- goto found;
- }
-
- // the HTTP-EQUIV meta has no effect on XHTML
- if (m_contentType == XML)
- goto found;
+ // Handle XML declaration, which can have encoding in it.
+ // This encoding is honored even for HTML documents.
+ if (ptr[0] == '?' && ptr[1] == 'x' && ptr[2] == 'm' && ptr[3] == 'l') {
+ const char* end = ptr;
+ while (*end != '>' && *end != '\0')
+ end++;
+ if (*end == '\0')
+ break;
+ DeprecatedCString str(ptr, end - ptr);
+ int len;
+ int pos = findXMLEncoding(str, len);
+ if (pos != -1)
+ setEncoding(TextEncoding(str.mid(pos, len)), EncodingFromXMLHeader);
+ // continue looking for a charset - it may be specified in an HTTP-Equiv meta
+ } else if (ptr[0] == 0 && ptr[1] == '?' && ptr[2] == 0 && ptr[3] == 'x' && ptr[4] == 0 && ptr[5] == 'm' && ptr[6] == 0 && ptr[7] == 'l') {
+ // UTF-16 without BOM
+ setEncoding(((ptr - m_buffer.data()) % 2) ? "UTF-16LE" : "UTF-16BE", AutoDetectedEncoding);
+ return true;
+ }
- if (*ptr == '/') {
- ++ptr;
- end=true;
- }
+ // the HTTP-EQUIV meta has no effect on XHTML
+ if (m_contentType == XML)
+ return true;
- char tmp[20];
- int len = 0;
- while (
- ((*ptr >= 'a') && (*ptr <= 'z') ||
- (*ptr >= 'A') && (*ptr <= 'Z') ||
- (*ptr >= '0') && (*ptr <= '9'))
- && len < 19 )
- {
- tmp[len] = tolower(*ptr);
- ptr++;
- len++;
- }
- tmp[len] = 0;
- AtomicString tag(tmp);
-
- if (tag == titleTag)
- withinTitle = !end;
-
- if (!end && tag == metaTag) {
- const char* end = ptr;
- while (*end != '>' && *end != '\0')
- end++;
- if (*end == '\0')
+ if (*ptr == '/') {
+ ++ptr;
+ end = true;
+ }
+
+ char tmp[20];
+ int len = 0;
+ while (
+ ((*ptr >= 'a') && (*ptr <= 'z') ||
+ (*ptr >= 'A') && (*ptr <= 'Z') ||
+ (*ptr >= '0') && (*ptr <= '9'))
+ && len < 19 )
+ {
+ tmp[len] = tolower(*ptr);
+ ptr++;
+ len++;
+ }
+ tmp[len] = 0;
+ AtomicString tag(tmp);
+
+ if (tag == titleTag)
+ withinTitle = !end;
+
+ if (!end && tag == metaTag) {
+ const char* end = ptr;
+ while (*end != '>' && *end != '\0')
+ end++;
+ if (*end == '\0')
+ break;
+ DeprecatedCString str(ptr, (end-ptr)+1);
+ str = str.lower();
+ int pos = 0;
+ while (pos < (int)str.length()) {
+ if ((pos = str.find("charset", pos, false)) == -1)
break;
- DeprecatedCString str(ptr, (end-ptr)+1);
- str = str.lower();
- int pos = 0;
- while (pos < (int)str.length()) {
- if ((pos = str.find("charset", pos, false)) == -1)
- break;
- pos += 7;
- // skip whitespace..
- while (pos < (int)str.length() && str[pos] <= ' ')
- pos++;
- if (pos == (int)str.length())
- break;
- if (str[pos++] != '=')
- continue;
- while (pos < (int)str.length() &&
- (str[pos] <= ' ') || str[pos] == '=' || str[pos] == '"' || str[pos] == '\'')
- pos++;
-
- // end ?
- if (pos == (int)str.length())
- break;
- unsigned endpos = pos;
- while (endpos < str.length() &&
- str[endpos] != ' ' && str[endpos] != '"' && str[endpos] != '\'' &&
- str[endpos] != ';' && str[endpos] != '>')
- endpos++;
- setEncodingName(str.mid(pos, endpos-pos), EncodingFromMetaTag);
- if (m_type == EncodingFromMetaTag)
- goto found;
-
- if (endpos >= str.length() || str[endpos] == '/' || str[endpos] == '>')
- break;
-
- pos = endpos + 1;
- }
- } else if (tag != scriptTag && tag != noscriptTag && tag != styleTag &&
- tag != linkTag && tag != metaTag && tag != objectTag &&
- tag != titleTag && tag != baseTag &&
- (end || tag != htmlTag) && !withinTitle &&
- (tag != headTag) && isalpha(tmp[0])) {
- m_reachedBody = true;
- goto found;
+ pos += 7;
+ // skip whitespace..
+ while (pos < (int)str.length() && str[pos] <= ' ')
+ pos++;
+ if (pos == (int)str.length())
+ break;
+ if (str[pos++] != '=')
+ continue;
+ while (pos < (int)str.length() &&
+ (str[pos] <= ' ') || str[pos] == '=' || str[pos] == '"' || str[pos] == '\'')
+ pos++;
+
+ // end ?
+ if (pos == (int)str.length())
+ break;
+ unsigned endpos = pos;
+ while (endpos < str.length() &&
+ str[endpos] != ' ' && str[endpos] != '"' && str[endpos] != '\'' &&
+ str[endpos] != ';' && str[endpos] != '>')
+ endpos++;
+ setEncoding(TextEncoding(str.mid(pos, endpos - pos)), EncodingFromMetaTag);
+ if (m_source == EncodingFromMetaTag)
+ return true;
+
+ if (endpos >= str.length() || str[endpos] == '/' || str[endpos] == '>')
+ break;
+
+ pos = endpos + 1;
}
+ } else if (tag != scriptTag && tag != noscriptTag && tag != styleTag &&
+ tag != linkTag && tag != metaTag && tag != objectTag &&
+ tag != titleTag && tag != baseTag &&
+ (end || tag != htmlTag) && !withinTitle &&
+ (tag != headTag) && isalpha(tmp[0])) {
+ m_checkedForHeadCharset = true;
+ return true;
}
- else
- ptr++;
}
- return DeprecatedString::null;
+ else
+ ptr++;
}
+ return false;
+}
- found:
- // Do the auto-detect if our default encoding is one of the Japanese ones.
- if (m_type != UserChosenEncoding && m_type != AutoDetectedEncoding && m_encoding.isJapanese()) {
- const char *autoDetectedEncoding;
- switch (KanjiCode::judge(data, len)) {
- case KanjiCode::JIS:
- autoDetectedEncoding = "jis7";
- break;
- case KanjiCode::EUC:
- autoDetectedEncoding = "eucjp";
- break;
- case KanjiCode::SJIS:
- autoDetectedEncoding = "sjis";
- break;
- default:
- autoDetectedEncoding = NULL;
- break;
- }
- if (autoDetectedEncoding)
- setEncodingName(autoDetectedEncoding, AutoDetectedEncoding);
+void Decoder::detectJapaneseEncoding(const char* data, size_t len)
+{
+ switch (KanjiCode::judge(data, len)) {
+ case KanjiCode::JIS:
+ setEncoding("ISO-2022-JP", AutoDetectedEncoding);
+ break;
+ case KanjiCode::EUC:
+ setEncoding("EUC-JP", AutoDetectedEncoding);
+ break;
+ case KanjiCode::SJIS:
+ setEncoding("Shift_JIS", AutoDetectedEncoding);
+ break;
+ case KanjiCode::ASCII:
+ case KanjiCode::UTF16:
+ case KanjiCode::UTF8:
+ break;
+ }
+}
+
+String Decoder::decode(const char* data, size_t len)
+{
+ if (!m_checkedForBOM)
+ checkForBOM(data, len);
+
+ if (m_contentType == CSS && !m_checkedForCSSCharset) {
+ checkForCSSCharset(data, len);
+ return "";
+ }
+
+ bool movedDataToBuffer = false;
+
+ if ((m_contentType == HTML || m_contentType == XML) && !m_checkedForHeadCharset) { // HTML and XML
+ if (!checkForHeadCharset(data, len, movedDataToBuffer))
+ return "";
}
- ASSERT(m_encoding.isValid());
+ // Do the auto-detect if our default encoding is one of the Japanese ones.
+ // FIXME: It seems wrong to change our encoding downstream after we have already done some decoding.
+ if (m_source != UserChosenEncoding && m_source != AutoDetectedEncoding && encoding().isJapanese())
+ detectJapaneseEncoding(data, len);
+
+ ASSERT(encoding().isValid());
- DeprecatedString out;
+ if (m_buffer.isEmpty())
+ return m_decoder.decode(data, len);
- if (!m_buffer.isEmpty()) {
- if (!currentChunkInBuffer)
- m_buffer.append(data, len);
- out = m_decoder->toUnicode(m_buffer.latin1(), m_buffer.length());
- m_buffer.truncate(0);
- } else
- out = m_decoder->toUnicode(data, len);
+ if (!movedDataToBuffer) {
+ size_t oldSize = m_buffer.size();
+ m_buffer.resize(oldSize + len);
+ memcpy(m_buffer.data() + oldSize, data, len);
+ }
- return out;
+ String result = m_decoder.decode(m_buffer.data(), m_buffer.size());
+ m_buffer.resize(0);
+ return result;
}
-DeprecatedString Decoder::flush() const
+String Decoder::flush()
{
- return m_decoder->toUnicode(m_buffer.latin1(), m_buffer.length(), true);
+ String result = m_decoder.decode(m_buffer.data(), m_buffer.size(), true);
+ m_buffer.resize(0);
+ return result;
}
-// -----------------------------------------------------------------------------
+}
This file is part of the KDE libraries
Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)
+ Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com)
+ Copyright (C) 2006 Apple Computer, Inc.
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
Boston, MA 02111-1307, USA.
*/
+
#ifndef Decoder_h
#define Decoder_h
-#include <wtf/OwnPtr.h>
-#include "TextEncoding.h"
#include "PlatformString.h"
+#include "Shared.h"
+#include "TextDecoder.h"
+#include <wtf/Vector.h>
namespace WebCore {
- class StreamingTextDecoder;
-
-/**
- * @internal
- */
-class Decoder : public Shared<Decoder>
-{
+class Decoder : public Shared<Decoder> {
public:
enum EncodingSource {
DefaultEncoding,
EncodingFromHTTPHeader,
UserChosenEncoding
};
-
- Decoder(const String& mimeType, const String& defaultEncodingName = String());
- ~Decoder();
- void setEncodingName(const char* encoding, EncodingSource type);
- const char* encodingName() const;
+ Decoder(const String& mimeType, const TextEncoding& defaultEncoding = TextEncoding());
+ ~Decoder();
- bool visuallyOrdered() const { return m_encoding.usesVisualOrdering(); }
- const TextEncoding& encoding() const { return m_encoding; }
+ void setEncoding(const TextEncoding&, EncodingSource);
+ const TextEncoding& encoding() const { return m_decoder.encoding(); }
- DeprecatedString decode(const char* data, int len);
- DeprecatedString flush() const;
+ String decode(const char* data, size_t length);
+ String flush();
private:
- enum ContentType {
- HTML,
- XML,
- CSS,
- PlainText // Do not look inside the document (equivalent to directly using StreamingTextDecoder)
- };
+ enum ContentType { PlainText, HTML, XML, CSS }; // PlainText is equivalent to directly using TextDecoder.
+ static ContentType determineContentType(const String& mimeType);
+ static const TextEncoding& defaultEncoding(ContentType, const TextEncoding& defaultEncoding);
- // encoding used for decoding. default is Latin1.
- TextEncoding m_encoding;
- ContentType m_contentType;
- OwnPtr<StreamingTextDecoder> m_decoder;
- DeprecatedCString m_encodingName;
- EncodingSource m_type;
+ void checkForBOM(const char*, size_t);
+ void checkForCSSCharset(const char*, size_t);
+ bool checkForHeadCharset(const char*, size_t, bool& movedDataToBuffer);
+ void detectJapaneseEncoding(const char*, size_t);
- // Our version of DeprecatedString works well for all-8-bit characters, and allows null characters.
- // This works better than DeprecatedCString when there are null characters involved.
- DeprecatedString m_buffer;
-
- bool m_reachedBody;
- bool m_checkedForCSSCharset;
+ ContentType m_contentType;
+ TextDecoder m_decoder;
+ EncodingSource m_source;
+ Vector<char> m_buffer;
bool m_checkedForBOM;
+ bool m_checkedForCSSCharset;
+ bool m_checkedForHeadCharset;
};
}
return cachedObject;
}
-CachedCSSStyleSheet *DocLoader::requestStyleSheet(const String& url, const DeprecatedString& charset)
+CachedCSSStyleSheet *DocLoader::requestStyleSheet(const String& url, const String& charset)
{
KURL fullURL = m_doc->completeURL(url.deprecatedString());
bool reload = needReload(fullURL);
- CachedCSSStyleSheet *cachedObject = Cache::requestStyleSheet(this, url, reload, m_expireDate, charset);
+ CachedCSSStyleSheet* cachedObject = Cache::requestStyleSheet(this, url, reload, m_expireDate, charset);
CheckCacheObjectStatus(this, cachedObject);
return cachedObject;
}
-CachedScript *DocLoader::requestScript(const String& url, const DeprecatedString& charset)
+CachedScript* DocLoader::requestScript(const String& url, const String& charset)
{
KURL fullURL = m_doc->completeURL(url.deprecatedString());
~DocLoader();
CachedImage* requestImage(const String& url);
- CachedCSSStyleSheet* requestStyleSheet(const String& url, const DeprecatedString& charset);
- CachedScript* requestScript(const String& url, const DeprecatedString& charset);
+ CachedCSSStyleSheet* requestStyleSheet(const String& url, const String& charset);
+ CachedScript* requestScript(const String& url, const String& charset);
#ifdef KHTML_XSLT
CachedXSLStyleSheet* requestXSLStyleSheet(const String& url);
/* This file is part of the KDE project
*
- * Copyright (C) 2004 Apple Computer, Inc.
+ * Copyright (C) 2004, 2006 Apple Computer, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
#include "config.h"
#include "FormData.h"
-#include <wtf/Vector.h>
+#include "CString.h"
+#include "TextEncoding.h"
namespace WebCore {
{
}
-FormData::FormData(const DeprecatedCString &s)
+FormData::FormData(const CString& s)
{
appendData(s.data(), s.length());
}
-void FormData::appendData(const void *data, size_t size)
+void FormData::appendData(const void* data, size_t size)
{
- if (m_elements.isEmpty() || m_elements.last().m_type != FormDataElement::data) {
+ if (m_elements.isEmpty() || m_elements.last().m_type != FormDataElement::data)
m_elements.append(FormDataElement());
- }
- FormDataElement &e = m_elements.last();
+ FormDataElement& e = m_elements.last();
size_t oldSize = e.m_data.size();
e.m_data.resize(oldSize + size);
memcpy(e.m_data.data() + oldSize, data, size);
}
-void FormData::appendFile(const DeprecatedString &filename)
+void FormData::appendFile(const String& filename)
{
m_elements.append(filename);
}
-Vector<char> FormData::flatten() const
+void FormData::flatten(Vector<char>& a) const
{
// Concatenate all the byte arrays, but omit any files.
- Vector<char> a;
+ a.clear();
+ size_t size = a.size();
for (DeprecatedValueListConstIterator<FormDataElement> it = m_elements.begin(); it != m_elements.end(); ++it) {
const FormDataElement& e = *it;
if (e.m_type == FormDataElement::data) {
- if (a.isEmpty())
- a = e.m_data;
- else {
- size_t oldSize = a.size();
- size_t delta = e.m_data.size();
- a.resize(oldSize + delta);
- memcpy(a.data() + oldSize, e.m_data.data(), delta);
- }
+ size_t delta = e.m_data.size();
+ a.resize(size + delta);
+ memcpy(a.data() + size, e.m_data.data(), delta);
+ size += delta;
}
}
- return a;
}
-DeprecatedString FormData::flattenToString() const
+String FormData::flattenToString() const
{
- Vector<char> bytes = flatten();
- return DeprecatedString::fromLatin1(bytes.data(), bytes.size());
+ Vector<char> bytes;
+ flatten(bytes);
+ return Latin1Encoding().decode(bytes.data(), bytes.size());
}
} // namespace WebCore
#ifndef FormData_h
#define FormData_h
-#include "DeprecatedString.h"
#include "DeprecatedValueList.h"
+#include "PlatformString.h"
#include <wtf/Vector.h>
namespace WebCore {
public:
FormDataElement() : m_type(data) { }
FormDataElement(const Vector<char>& array) : m_type(data), m_data(array) { }
- FormDataElement(const DeprecatedString& filename) : m_type(encodedFile), m_filename(filename) { }
+ FormDataElement(const String& filename) : m_type(encodedFile), m_filename(filename) { }
enum { data, encodedFile } m_type;
Vector<char> m_data;
- DeprecatedString m_filename;
+ String m_filename;
};
class FormData {
public:
FormData();
- FormData(const DeprecatedCString&);
+ FormData(const CString&);
- void appendData(const void *data, size_t size);
- void appendFile(const DeprecatedString& filename);
+ void appendData(const void* data, size_t);
+ void appendFile(const String& filename);
- Vector<char> flatten() const; // omits files
- DeprecatedString flattenToString() const; // omits files
+ void flatten(Vector<char>&) const; // omits files
+ String flattenToString() const; // omits files
size_t count() const { return m_elements.count(); }
DeprecatedValueListConstIterator<FormDataElement> begin() const { return m_elements.begin(); }
private:
virtual void setStyleSheet(const String& /*URL*/, const String& sheet)
{
- m_frame->setUserStyleSheet(sheet.deprecatedString());
+ m_frame->setUserStyleSheet(sheet);
}
Frame* m_frame;
CachedCSSStyleSheet* m_cachedSheet;
return d->m_jscript;
}
-static bool getString(JSValue* result, DeprecatedString& string)
+static bool getString(JSValue* result, String& string)
{
if (!result)
return false;
void Frame::replaceContentsWithScriptResult(const KURL& url)
{
JSValue* ret = executeScript(0, KURL::decode_string(url.url().mid(strlen("javascript:"))));
- DeprecatedString scriptResult;
+ String scriptResult;
if (getString(ret, scriptResult)) {
begin();
write(scriptResult);
}
}
-JSValue* Frame::executeScript(Node* n, const DeprecatedString& script, bool forceUserGesture)
+JSValue* Frame::executeScript(Node* n, const String& script, bool forceUserGesture)
{
KJSProxy *proxy = jScript();
d->m_scheduledRedirection = noRedirectionScheduled;
d->m_delayRedirect = 0;
d->m_redirectURL = DeprecatedString::null;
- d->m_redirectReferrer = DeprecatedString::null;
+ d->m_redirectReferrer = String();
d->m_redirectLockHistory = true;
d->m_redirectUserGesture = false;
d->m_bHTTPRefresh = false;
d->m_bMousePressed = false;
if (!d->m_haveEncoding)
- d->m_encoding = DeprecatedString::null;
+ d->m_encoding = String();
}
Document *Frame::document() const
}
if (!d->m_decoder) {
- d->m_decoder = new Decoder(d->m_request.m_responseMIMEType, settings()->encoding().latin1());
+ d->m_decoder = new Decoder(d->m_request.m_responseMIMEType, settings()->encoding());
if (!d->m_encoding.isNull())
- d->m_decoder->setEncodingName(d->m_encoding.latin1(),
+ d->m_decoder->setEncoding(d->m_encoding,
d->m_haveEncoding ? Decoder::UserChosenEncoding : Decoder::EncodingFromHTTPHeader);
-
if (d->m_doc)
d->m_doc->setDecoder(d->m_decoder.get());
}
- DeprecatedString decoded = d->m_decoder->decode(str, len);
- if (decoded.isEmpty())
- return;
+ String decoded = d->m_decoder->decode(str, len);
- if (d->m_bFirstData) {
- // determine the parse mode
- d->m_doc->determineParseMode(decoded);
- d->m_bFirstData = false;
+ if (decoded.isEmpty())
+ return;
- // ### this is still quite hacky, but should work a lot better than the old solution
- if (d->m_decoder->visuallyOrdered()) d->m_doc->setVisuallyOrdered();
- d->m_doc->recalcStyle(Node::Force);
- }
+ if (d->m_bFirstData) {
+ d->m_bFirstData = false;
+ d->m_doc->determineParseMode(decoded);
+ if (d->m_decoder->encoding().usesVisualOrdering())
+ d->m_doc->setVisuallyOrdered();
+ d->m_doc->recalcStyle(Node::Force);
+ }
- if (Tokenizer* t = d->m_doc->tokenizer()) {
- ASSERT(!t->wantsRawData());
- t->write(decoded, true);
- }
+ if (Tokenizer* t = d->m_doc->tokenizer()) {
+ ASSERT(!t->wantsRawData());
+ t->write(decoded, true);
+ }
}
-void Frame::write(const DeprecatedString& str)
+void Frame::write(const String& str)
{
- if (str.isNull())
- return;
+ if (str.isNull())
+ return;
- if (d->m_bFirstData) {
- // determine the parse mode
- d->m_doc->setParseMode(Document::Strict);
- d->m_bFirstData = false;
- }
- Tokenizer* t = d->m_doc->tokenizer();
- if (t)
- t->write(str, true);
+ if (d->m_bFirstData) {
+ // determine the parse mode
+ d->m_doc->setParseMode(Document::Strict);
+ d->m_bFirstData = false;
+ }
+ Tokenizer* t = d->m_doc->tokenizer();
+ if (t)
+ t->write(str, true);
}
void Frame::end()
// make sure nothing's left in there...
if (d->m_doc) {
if (d->m_decoder) {
- DeprecatedString decoded = d->m_decoder->flush();
+ String decoded = d->m_decoder->flush();
if (d->m_bFirstData) {
- d->m_doc->determineParseMode(decoded);
d->m_bFirstData = false;
+ d->m_doc->determineParseMode(decoded);
}
write(decoded);
}
String Frame::baseTarget() const
{
if (!d->m_doc)
- return DeprecatedString();
+ return String();
return d->m_doc->baseTarget();
}
d->m_scheduledRedirection = redirectionScheduled;
d->m_delayRedirect = delay;
d->m_redirectURL = url;
- d->m_redirectReferrer = DeprecatedString::null;
+ d->m_redirectReferrer = String();
d->m_redirectLockHistory = doLockHistory;
d->m_redirectUserGesture = false;
}
}
-void Frame::scheduleLocationChange(const DeprecatedString& url, const DeprecatedString& referrer, bool lockHistory, bool userGesture)
+void Frame::scheduleLocationChange(const DeprecatedString& url, const String& referrer, bool lockHistory, bool userGesture)
{
KURL u(url);
d->m_scheduledRedirection = historyNavigationScheduled;
d->m_delayRedirect = 0;
d->m_redirectURL = DeprecatedString::null;
- d->m_redirectReferrer = DeprecatedString::null;
+ d->m_redirectReferrer = String();
d->m_scheduledHistoryNavigationSteps = steps;
stopRedirectionTimer();
if (d->m_bComplete)
}
}
-void Frame::changeLocation(const DeprecatedString& URL, const DeprecatedString& referrer, bool lockHistory, bool userGesture)
+void Frame::changeLocation(const DeprecatedString& URL, const String& referrer, bool lockHistory, bool userGesture)
{
if (URL.find("javascript:", 0, false) == 0) {
- DeprecatedString script = KURL::decode_string(URL.mid(11));
+ String script = KURL::decode_string(URL.mid(11));
JSValue* result = executeScript(0, script, userGesture);
- DeprecatedString scriptResult;
+ String scriptResult;
if (getString(result, scriptResult)) {
begin(url());
write(scriptResult);
}
DeprecatedString URL = d->m_redirectURL;
- DeprecatedString referrer = d->m_redirectReferrer;
+ String referrer = d->m_redirectReferrer;
bool lockHistory = d->m_redirectLockHistory;
bool userGesture = d->m_redirectUserGesture;
d->m_scheduledRedirection = noRedirectionScheduled;
d->m_delayRedirect = 0;
d->m_redirectURL = DeprecatedString::null;
- d->m_redirectReferrer = DeprecatedString::null;
+ d->m_redirectReferrer = String();
changeLocation(URL, referrer, lockHistory, userGesture);
}
-DeprecatedString Frame::encoding() const
+String Frame::encoding() const
{
if (d->m_haveEncoding && !d->m_encoding.isEmpty())
return d->m_encoding;
-
if (d->m_decoder && d->m_decoder->encoding().isValid())
- return d->m_decoder->encodingName();
-
+ return d->m_decoder->encoding().name();
return settings()->encoding();
}
DeprecatedString bodyEnc;
if (contentType.lower() == "multipart/form-data")
// FIXME: is this correct? I suspect not
- bodyEnc = KURL::encode_string(formData.flattenToString());
+ bodyEnc = KURL::encode_string(formData.flattenToString().deprecatedString());
else if (contentType.lower() == "text/plain") {
// Convention seems to be to decode, and s/&/\n/
- DeprecatedString tmpbody = formData.flattenToString();
+ DeprecatedString tmpbody = formData.flattenToString().deprecatedString();
tmpbody.replace('&', '\n');
tmpbody.replace('+', ' ');
tmpbody = KURL::decode_string(tmpbody); // Decode the rest of it
bodyEnc = KURL::encode_string(tmpbody); // Recode for the URL
} else
- bodyEnc = KURL::encode_string(formData.flattenToString());
+ bodyEnc = KURL::encode_string(formData.flattenToString().deprecatedString());
nvps.append(String::sprintf("body=%s", bodyEnc.latin1()).deprecatedString());
q = nvps.join("&");
if (strcmp(action, "get") == 0) {
if (u.protocol() != "mailto")
- u.setQuery(formData.flattenToString());
+ u.setQuery(formData.flattenToString().deprecatedString());
request.setDoPost(false);
} else {
request.postData = formData;
return d->m_kjsDefaultStatusBarText;
}
-DeprecatedString Frame::referrer() const
+String Frame::referrer() const
{
return d->m_referrer;
}
d->m_typingStyle = 0;
}
-JSValue* Frame::executeScript(const String& filename, int baseLine, Node* n, const DeprecatedString& script)
+JSValue* Frame::executeScript(const String& filename, int baseLine, Node* n, const String& script)
{
- // FIXME: This is missing stuff that the other executeScript has.
- // --> d->m_runningScripts and submitFormAgain.
- // Why is that OK?
- KJSProxy *proxy = jScript();
- if (!proxy)
- return 0;
- JSValue* ret = proxy->evaluate(filename, baseLine, script, n);
- Document::updateDocumentsRendering();
- return ret;
+ // FIXME: This is missing stuff that the other executeScript has.
+ // --> d->m_runningScripts and submitFormAgain.
+ // Why is that OK?
+ KJSProxy* proxy = jScript();
+ if (!proxy)
+ return 0;
+ JSValue* ret = proxy->evaluate(filename, baseLine, script, n);
+ Document::updateDocumentsRendering();
+ return ret;
}
Frame *Frame::opener()
{
}
-bool Frame::isCharacterSmartReplaceExempt(const DeprecatedChar&, bool)
+bool Frame::isCharacterSmartReplaceExempt(UChar, bool)
{
// no smart replace
return true;
return start ? scanForForm(start) : 0;
}
-void Frame::setEncoding(const DeprecatedString& name, bool userChosen)
+void Frame::setEncoding(const String& name, bool userChosen)
{
if (!d->m_workingURL.isEmpty())
receivedFirstData();
/**
* Execute the specified snippet of JavaScript code.
*/
- KJS::JSValue* executeScript(Node*, const DeprecatedString& script, bool forceUserGesture = false);
+ KJS::JSValue* executeScript(Node*, const String& script, bool forceUserGesture = false);
/**
* Implementation of CSS property -webkit-user-drag == auto
* Make a location change, or schedule one for later.
* These are used for JavaScript-triggered location changes.
*/
- void changeLocation(const DeprecatedString& URL, const DeprecatedString& referrer, bool lockHistory = true, bool userGesture = false);
- void scheduleLocationChange(const DeprecatedString& url, const DeprecatedString& referrer, bool lockHistory = true, bool userGesture = false);
+ void changeLocation(const DeprecatedString& URL, const String& referrer, bool lockHistory = true, bool userGesture = false);
+ void scheduleLocationChange(const DeprecatedString& url, const String& referrer, bool lockHistory = true, bool userGesture = false);
void scheduleRefresh(bool userGesture = false);
bool isScheduledLocationChangePending() const;
* this function many times in sequence. But remember: The fewer calls
* you make, the faster the widget will be.
*
- * The HTML code is send through a decoder which decodes the stream to
+ * The HTML code is sent through a decoder which decodes the stream to
* Unicode.
*
* The @p len parameter is needed for streams encoded in utf-16,
* parameter.
*
* Attention: Don't mix calls to @ref write(const char*) with calls
- * to @ref write(const DeprecatedString& ).
+ * to @ref write(const String&).
*
* The result might not be what you want.
*/
* this function many times in sequence. But remember: The fewer calls
* you make, the faster the widget will be.
*/
- virtual void write(const DeprecatedString& str);
+ virtual void write(const String&);
/**
* Call this after your last call to @ref write().
void paint(GraphicsContext*, const IntRect&);
- void setEncoding(const DeprecatedString& encoding, bool userChosen);
-
- /**
- * Returns the encoding the page currently uses.
- *
- * Note that the encoding might be different from the charset.
- */
- DeprecatedString encoding() const;
+ void setEncoding(const String& encoding, bool userChosen);
+ String encoding() const;
/**
* Sets a user defined style sheet to be used on top of the HTML4,
/**
* Referrer used for links in this page.
*/
- DeprecatedString referrer() const;
+ String referrer() const;
/**
* Last-modified date (in raw string format), if received in the [HTTP] headers.
void applyEditingStyleToElement(Element*) const;
void removeEditingStyleFromElement(Element*) const;
virtual void print() = 0;
- virtual bool isCharacterSmartReplaceExempt(const DeprecatedChar&, bool);
+ virtual bool isCharacterSmartReplaceExempt(UChar, bool);
// Used to keep the part alive when running a script that might destroy it.
void keepAlive();
void cancelRedirection(bool newLoadInProgress = false);
public:
- KJS::JSValue* executeScript(const String& filename, int baseLine, Node*, const DeprecatedString& script);
+ KJS::JSValue* executeScript(const String& filename, int baseLine, Node*, const String& script);
KJSProxy* jScript();
Frame* opener();
void setOpener(Frame* _opener);
BrowserExtension* m_extension;
RefPtr<Document> m_doc;
RefPtr<Decoder> m_decoder;
- DeprecatedString m_encoding;
- DeprecatedString scheduledScript;
+ String m_encoding;
+ String scheduledScript;
RefPtr<Node> scheduledScriptNode;
KJSProxy* m_jscript;
RedirectionScheduled m_scheduledRedirection;
double m_delayRedirect;
DeprecatedString m_redirectURL;
- DeprecatedString m_redirectReferrer;
+ String m_redirectReferrer;
int m_scheduledHistoryNavigationSteps;
int m_zoomFactor;
- DeprecatedString m_referrer;
+ String m_referrer;
struct SubmitForm {
const char* submitAction;
bool isPluginsEnabled() const { return m_pluginsEnabled; }
bool privateBrowsingEnabled() const { return m_privateBrowsingEnabled; }
- const DeprecatedString& encoding() const { return m_encoding; }
+ const String& encoding() const { return m_encoding; }
KURL userStyleSheetLocation() const { return m_userStyleSheetLocation; }
bool shouldPrintBackgrounds() const { return m_shouldPrintBackgrounds; }
void setPrivateBrowsingEnabled(bool f) { m_privateBrowsingEnabled = f; }
void setJavaScriptCanOpenWindowsAutomatically(bool f) { m_javaScriptCanOpenWindowsAutomatically = f; }
- void setEncoding(const DeprecatedString& s) { m_encoding = s; }
+ void setEncoding(const String& s) { m_encoding = s; }
void setUserStyleSheetLocation(const KURL& s) { m_userStyleSheetLocation = s; }
void setShouldPrintBackgrounds(bool f) { m_shouldPrintBackgrounds = f; }
AtomicString m_sansSerifFontName;
AtomicString m_cursiveFontName;
AtomicString m_fantasyFontName;
- DeprecatedString m_encoding; // FIXME: TextEncoding takes a latin1 string, which String & AtomicString don't easily produce
+ String m_encoding;
KURL m_userStyleSheetLocation;
int m_minimumFontSize;
return DeprecatedCString(data(), length() + 1);
}
+CString CString::newUninitialized(size_t length, char*& characterBuffer)
+{
+ CString result;
+ result.m_buffer = new CStringBuffer(length + 1);
+ char* bytes = result.m_buffer->data();
+ bytes[length] = '\0';
+ characterBuffer = bytes;
+ return result;
+}
+
}
#ifndef CString_h
#define CString_h
-#include <wtf/Vector.h>
#include "Shared.h"
+#include <wtf/Vector.h>
namespace WebCore {
+
class DeprecatedCString;
class CStringBuffer : public Shared<CStringBuffer> {
public:
CStringBuffer(unsigned length) : m_vector(length) { }
-
+
char* data() { return m_vector.data(); }
unsigned length() const { return m_vector.size(); }
+
private:
Vector<char> m_vector;
};
-
+
class CString {
public:
CString() { }
- CString(const char* str);
- CString(const char* str, unsigned length);
-
+ CString(const char*);
+ CString(const char*, unsigned length);
+ static CString newUninitialized(size_t length, char*& characterBuffer);
+
const char* data() const;
unsigned length() const;
-
+
operator const char*() const { return data(); }
-
+
bool isNull() const { return !m_buffer; }
-
- CString(const DeprecatedCString& str);
+
+ CString(const DeprecatedCString&);
DeprecatedCString deprecatedCString() const;
+
private:
- void init(const char*str, unsigned length);
+ void init(const char*, unsigned length);
RefPtr<CStringBuffer> m_buffer;
};
-
+
}
#endif // CString_h
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#include "TextEncoding.h"
-
-#if __APPLE__
-#include "ExtraCFEncodings.h"
-#endif
-
namespace WebCore {
+ #define kTextEncodingISOLatinThai kCFStringEncodingISOLatinThai
+
struct CharsetEntry {
const char* name;
- TextEncodingID encoding;
- int flags; // actually TextEncodingFlags
+ ::TextEncoding encoding;
};
extern const CharsetEntry CharsetTable[];
+++ /dev/null
-/*
- * Copyright (C) 2003, 2006 Apple Computer, Inc. All rights reserved.
- * Copyright (C) 2005 Alexey Proskuryakov <ap@nypop.com>.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "CharsetNames.h"
-
-#include "CharsetData.h"
-#include <wtf/Assertions.h>
-#include <wtf/HashMap.h>
-#include <unicode/ucnv.h>
-
-using namespace WTF;
-
-namespace WebCore {
-
-struct TextEncodingIDHashTraits : GenericHashTraits<TextEncodingID> {
- static const bool emptyValueIsZero = false;
- static TraitType emptyValue() { return InvalidEncoding; }
- static TraitType deletedValue() { return InvalidEncoding2; }
-};
-
-// Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
-// or anything like that.
-const unsigned PHI = 0x9e3779b9U;
-
-// Hash for all-ASCII strings that does case folding and skips any characters
-// that are not alphanumeric. If passed any non-ASCII characters, depends on
-// the behavior of isalnum -- if that returns false as it does on OS X, then
-// it will properly skip those characters too.
-struct EncodingNameHash {
-
- static bool equal(const char* s1, const char* s2)
- {
- char c1;
- char c2;
-
- do {
- do
- c1 = *s1++;
- while (c1 && !isalnum(c1));
- do
- c2 = *s2++;
- while (c2 && !isalnum(c2));
- if (tolower(c1) != tolower(c2))
- return false;
- } while (c1 && c2);
-
- return !c1 && !c2;
- }
-
- // This algorithm is the one-at-a-time hash from:
- // http://burtleburtle.net/bob/hash/hashfaq.html
- // http://burtleburtle.net/bob/hash/doobs.html
- static unsigned hash(const char* s)
- {
- unsigned h = PHI;
-
- for (int i = 0; i != 16; ++i) {
- char c;
- do
- c = *s++;
- while (c && !isalnum(c));
- if (!c)
- break;
- h += tolower(c);
- h += (h << 10);
- h ^= (h >> 6);
- }
-
- h += (h << 3);
- h ^= (h >> 11);
- h += (h << 15);
-
- return h;
- }
-
-};
-
-typedef HashMap<const char*, const CharsetEntry*, EncodingNameHash> NameMap;
-typedef HashMap<TextEncodingID, const CharsetEntry*, IntHash<TextEncodingID>, TextEncodingIDHashTraits> EncodingMap;
-
-static NameMap* nameMap;
-static EncodingMap* encodingMap;
-
-static void buildCharsetMaps()
-{
- ASSERT(!nameMap);
- ASSERT(!encodingMap);
-
- nameMap = new NameMap;
- encodingMap = new EncodingMap;
-
- for (int i = 0; CharsetTable[i].name; ++i) {
- ASSERT(CharsetTable[i].encoding != TextEncodingIDHashTraits::emptyValue());
- ASSERT(CharsetTable[i].encoding != TextEncodingIDHashTraits::deletedValue());
-
- nameMap->add(CharsetTable[i].name, &CharsetTable[i]);
- encodingMap->add(CharsetTable[i].encoding, &CharsetTable[i]);
- }
-}
-
-TextEncodingID textEncodingIDFromCharsetName(const char* name, TextEncodingFlags* flags)
-{
- if (!nameMap)
- buildCharsetMaps();
-
- const CharsetEntry* entry = nameMap->get(name);
- if (!entry) {
- UErrorCode err = U_ZERO_ERROR;
- const char* standardName = ucnv_getStandardName(name, "IANA", &err);
- if (!standardName || !(entry = nameMap->get(standardName))) {
- if (flags)
- *flags = NoEncodingFlags;
- return InvalidEncoding;
- }
- }
-
- if (flags)
- *flags = static_cast<TextEncodingFlags>(entry->flags);
- return entry->encoding;
-}
-
-const char* charsetNameFromTextEncodingID(TextEncodingID encoding)
-{
- if (!encodingMap)
- buildCharsetMaps();
-
- const CharsetEntry* entry = encodingMap->get(encoding);
- if (!entry)
- return 0;
- return entry->name;
-}
-
-} // namespace WebCore
#include "config.h"
#include "DeprecatedString.h"
+#include "CString.h"
#include "Logging.h"
+#include "PlatformString.h"
#include "RegularExpression.h"
#include "TextEncoding.h"
#include <kjs/dtoa.h>
DeprecatedString DeprecatedString::fromUtf8(const char *chs)
{
- return TextEncoding(UTF8Encoding).toUnicode(chs, strlen(chs));
+ return UTF8Encoding().decode(chs, strlen(chs)).deprecatedString();
}
DeprecatedString DeprecatedString::fromUtf8(const char *chs, int len)
{
- return TextEncoding(UTF8Encoding).toUnicode(chs, len);
+ return UTF8Encoding().decode(chs, len).deprecatedString();
}
DeprecatedCString DeprecatedString::utf8(int& length) const
{
- DeprecatedCString result = TextEncoding(UTF8Encoding).fromUnicode(*this);
+ DeprecatedCString result = UTF8Encoding().encode((::UChar*)unicode(), this->length()).deprecatedCString();
length = result.length();
return result;
}
if (u_getCombiningClass(m_run[currentCharacter + 1]) == HIRAGANA_KATAKANA_VOICING_MARKS) {
// Normalize into composed form using 3.2 rules.
UChar normalizedCharacters[2] = { 0, 0 };
- UErrorCode uStatus = (UErrorCode)0;
+ UErrorCode uStatus = U_ZERO_ERROR;
int32_t resultLength = unorm_normalize(m_run.data(currentCharacter), 2,
UNORM_NFC, UNORM_UNICODE_3_2, &normalizedCharacters[0], 2, &uStatus);
if (resultLength == 1 && uStatus == 0)
#include "config.h"
#include "GraphicsContext.h"
-#include "DeprecatedString.h"
#include "Font.h"
using namespace std;
#include "IntRect.h"
#include "Pen.h"
#include "TextDirection.h"
-#include <unicode/umachine.h>
+#include "UChar.h"
#include <wtf/Noncopyable.h>
#include <wtf/Platform.h>
const int cMisspellingLinePatternGapWidth = 1;
class AffineTransform;
- class DeprecatedString;
class Font;
class GraphicsContextPrivate;
class GraphicsContextPlatformPrivate;
#include "config.h"
#include "KURL.h"
+#include "CString.h"
#include "PlatformString.h"
#include "RegularExpression.h"
+#include "TextEncoding.h"
#include <wtf/Vector.h>
#include <unicode/uidna.h>
#include <assert.h>
parse(url.ascii(), &url);
}
-KURL::KURL(const KURL &base, const DeprecatedString &relative, const TextEncoding& encoding)
+KURL::KURL(const KURL& base, const DeprecatedString& relative)
+{
+ init(base, relative, UTF8Encoding());
+}
+
+KURL::KURL(const KURL& base, const DeprecatedString& relative, const TextEncoding& encoding)
+{
+ init(base, relative, encoding);
+}
+
+void KURL::init(const KURL &base, const DeprecatedString &relative, const TextEncoding& encoding)
{
// Allow at lest absolute URLs to resolve against an empty URL.
if (!base.m_isValid && !base.isEmpty()) {
return result;
}
-DeprecatedString KURL::decode_string(const DeprecatedString& urlString, const TextEncoding& encoding)
+DeprecatedString KURL::decode_string(const DeprecatedString& urlString)
{
- static const TextEncoding utf8Encoding(UTF8Encoding);
+ return decode_string(urlString, UTF8Encoding());
+}
+DeprecatedString KURL::decode_string(const DeprecatedString& urlString, const TextEncoding& encoding)
+{
DeprecatedString result("");
Vector<char, 2048> buffer(0);
}
// Decode the bytes into Unicode characters.
- DeprecatedString decoded = (encoding.isValid() ? encoding : utf8Encoding).toUnicode(buffer, p - buffer);
- if (decoded.isEmpty()) {
+ String decoded = (encoding.isValid() ? encoding : UTF8Encoding()).decode(buffer, p - buffer);
+ if (decoded.isEmpty())
continue;
- }
// Build up the string with what we just skipped and what we just decoded.
result.append(urlString.mid(decodedPosition, encodedRunPosition - decodedPosition));
- result.append(decoded);
+ result.append(reinterpret_cast<const DeprecatedChar*>(decoded.characters()), decoded.length());
decodedPosition = encodedRunEnd;
}
char *strBuffer;
- TextEncoding pathEncoding(UTF8Encoding);
- TextEncoding otherEncoding = encoding.isValid() ? encoding : TextEncoding(UTF8Encoding);
+ TextEncoding pathEncoding(UTF8Encoding());
+ TextEncoding otherEncoding = encoding.isValid() ? encoding : UTF8Encoding();
int pathEnd = -1;
if (pathEncoding != otherEncoding) {
pathEnd = s.find(RegularExpression("[?#]"));
}
if (pathEnd == -1) {
- DeprecatedCString decoded = pathEncoding.fromUnicode(s);
+ CString decoded = pathEncoding.encode(reinterpret_cast<const UChar*>(s.unicode()), s.length());
int decodedLength = decoded.length();
strBuffer = static_cast<char *>(fastMalloc(decodedLength + 1));
memcpy(strBuffer, decoded, decodedLength);
strBuffer[decodedLength] = 0;
} else {
- DeprecatedCString pathDecoded = pathEncoding.fromUnicode(s.left(pathEnd));
- DeprecatedCString otherDecoded = otherEncoding.fromUnicode(s.mid(pathEnd));
+ int length = s.length();
+ CString pathDecoded = pathEncoding.encode(reinterpret_cast<const UChar*>(s.unicode()), pathEnd);
+ CString otherDecoded = otherEncoding.encode(reinterpret_cast<const UChar*>(s.unicode()) + pathEnd, length - pathEnd);
int pathDecodedLength = pathDecoded.length();
int otherDecodedLength = otherDecoded.length();
strBuffer = static_cast<char *>(fastMalloc(pathDecodedLength + otherDecodedLength + 1));
/*
- * Copyright (C) 2003 Apple Computer, Inc. All rights reserved.
+ * Copyright (C) 2003, 2004, 2005, 2006 Apple Computer, Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
#ifndef KURL_h
#define KURL_h
+#include "DeprecatedString.h"
#include <wtf/Platform.h>
-#include "TextEncoding.h"
#if PLATFORM(CF)
typedef const struct __CFURL * CFURLRef;
class KURL;
class String;
+ class TextEncoding;
bool operator==(const KURL&, const KURL&);
bool equalIgnoringRef(const KURL&, const KURL&);
public:
KURL();
KURL(const char*);
- KURL(const KURL&, const DeprecatedString&, const TextEncoding& encoding = TextEncoding(UTF8Encoding));
+ KURL(const KURL&, const DeprecatedString&);
+ KURL(const KURL&, const DeprecatedString&, const TextEncoding&);
KURL(const DeprecatedString&);
#if PLATFORM(MAC)
KURL(NSURL*);
bool isLocalFile() const;
- static DeprecatedString decode_string(const DeprecatedString &, const TextEncoding& encoding = TextEncoding(UTF8Encoding));
- static DeprecatedString encode_string(const DeprecatedString &);
+ static DeprecatedString decode_string(const DeprecatedString&);
+ static DeprecatedString decode_string(const DeprecatedString&, const TextEncoding&);
+ static DeprecatedString encode_string(const DeprecatedString&);
friend bool operator==(const KURL &, const KURL &);
private:
bool isHierarchical() const;
+ void init(const KURL&, const DeprecatedString&, const TextEncoding&);
void parse(const char *url, const DeprecatedString *originalString);
DeprecatedString urlString;
String(const char*, unsigned length);
String(StringImpl* i) : m_impl(i) { }
+ static String newUninitialized(size_t length, UChar*& characterBuffer);
+
operator KJS::Identifier() const;
operator KJS::UString() const;
}
}
-DeprecatedString SegmentedString::toString() const
+String SegmentedString::toString() const
{
- DeprecatedString result;
+ String result;
if (m_pushedChar1) {
result.append(m_pushedChar1);
if (m_pushedChar2)
#ifndef SegmentedString_h
#define SegmentedString_h
-#include "DeprecatedString.h"
#include "DeprecatedValueList.h"
#include "PlatformString.h"
#include <assert.h>
friend class SegmentedString;
SegmentedSubstring() : m_length(0), m_current(0) {}
- SegmentedSubstring(const DeprecatedString &str) : m_string(str), m_length(str.length()) {
- m_current = m_length == 0 ? 0 : reinterpret_cast<const UChar*>(m_string.stableUnicode());
+ SegmentedSubstring(const String& str) : m_string(str), m_length(str.length()) {
+ m_current = m_length == 0 ? 0 : m_string.characters();
}
SegmentedSubstring(const UChar* str, int length) : m_length(length), m_current(length == 0 ? 0 : str) {}
void clear() { m_length = 0; m_current = 0; }
- void appendTo(DeprecatedString& str) const {
- if (reinterpret_cast<const UChar*>(m_string.unicode()) == m_current) {
+ void appendTo(String& str) const {
+ if (m_string.characters() == m_current) {
if (str.isEmpty())
str = m_string;
else
str.append(m_string);
} else {
- str.insert(str.length(), reinterpret_cast<const DeprecatedChar*>(m_current), m_length);
+ str.append(String(m_current, m_length));
}
}
- DeprecatedString m_string;
+ String m_string;
int m_length;
const UChar* m_current;
};
SegmentedString(const UChar* str, int length) : m_pushedChar1(0), m_pushedChar2(0)
, m_currentString(str, length), m_currentChar(m_currentString.m_current)
, m_lines(0), m_composite(false) {}
- SegmentedString(const DeprecatedString &str)
+ SegmentedString(const String& str)
: m_pushedChar1(0), m_pushedChar2(0), m_currentString(str)
, m_currentChar(m_currentString.m_current)
, m_lines(0), m_composite(false) {}
int lineCount() const { return m_lines; }
void resetLineCount() { m_lines = 0; }
- DeprecatedString toString() const;
+ String toString() const;
void operator++() { advance(); }
const UChar& operator*() const { return *current(); }
#include "config.h"
#include "StreamingTextDecoder.h"
-#if USE(ICU_UNICODE)
- #include "StreamingTextDecoderICU.h"
-#endif
-
-#if PLATFORM(MAC)
- #include "StreamingTextDecoderMac.h"
-#endif
-
-#include <wtf/Assertions.h>
-#include <wtf/OwnPtr.h>
+#include "PlatformString.h"
namespace WebCore {
-StreamingTextDecoder* StreamingTextDecoder::create(const TextEncoding& encoding)
-{
-#if USE(ICU_UNICODE)
- OwnPtr<StreamingTextDecoderICU> decoderICU(new StreamingTextDecoderICU(encoding));
- if (decoderICU->textEncodingSupported())
- return decoderICU.release();
-#endif
-
-#if PLATFORM(MAC)
- OwnPtr<StreamingTextDecoderMac> decoderMac(new StreamingTextDecoderMac(encoding));
- if (decoderMac->textEncodingSupported())
- return decoderMac.release();
-#endif
-
- LOG_ERROR("no converter can convert from text encoding 0x%X", encoding.encodingID());
+const UChar BOM = 0xFEFF;
-#if USE(ICU_UNICODE)
- return decoderICU.release();
-#elif PLATFORM(MAC)
- return decoderMac.release();
-#endif
+TextCodec::~TextCodec()
+{
}
-StreamingTextDecoder::~StreamingTextDecoder()
+// We strip BOM characters because they can show up both at the start of content
+// and inside content, and we never want them to end up in the decoded text.
+void TextCodec::appendOmittingBOM(String& s, const UChar* characters, size_t length)
{
+ size_t start = 0;
+ for (size_t i = 0; i != length; ++i) {
+ if (BOM == characters[i]) {
+ if (start != i)
+ s.append(String(&characters[start], i - start));
+ start = i + 1;
+ }
+ }
+ if (start != length)
+ s.append(String(&characters[start], length - start));
}
} // namespace WebCore
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef StreamingTextDecoder_H
-#define StreamingTextDecoder_H
+#ifndef StreamingTextDecoder_h
+#define StreamingTextDecoder_h
-#include "TextEncoding.h"
+#include "UChar.h"
+#include <memory>
#include <wtf/Noncopyable.h>
namespace WebCore {
- class StreamingTextDecoder : Noncopyable {
+ class CString;
+ class String;
+ class TextEncoding;
+
+ class TextCodec : Noncopyable {
public:
- static StreamingTextDecoder* create(const TextEncoding&);
- virtual ~StreamingTextDecoder();
+ virtual ~TextCodec();
+
+ virtual String decode(const char*, size_t length, bool flush = false) = 0;
+ virtual CString encode(const UChar*, size_t length, bool allowEntities = false) = 0;
- virtual DeprecatedString toUnicode(const char* chs, int len, bool flush = false) = 0;
- virtual DeprecatedCString fromUnicode(const DeprecatedString&, bool allowEntities = false) = 0;
+ protected:
+ static void appendOmittingBOM(String&, const UChar*, size_t length);
};
-
+
+ typedef void (*EncodingNameRegistrar)(const char* alias, const char* name);
+
+ typedef std::auto_ptr<TextCodec> (*NewTextCodecFunction)(const TextEncoding&, const void* additionalData);
+ typedef void (*TextCodecRegistrar)(const char* name, NewTextCodecFunction, const void* additionalData);
+
} // namespace WebCore
-#endif // StreamingTextDecoder_H
+#endif // StreamingTextDecoder_h
#include "config.h"
#include "StreamingTextDecoderICU.h"
-#include <unicode/unorm.h>
+#include "CString.h"
+#include "PlatformString.h"
+#include <unicode/ucnv.h>
#include <wtf/Assertions.h>
+using std::auto_ptr;
using std::min;
namespace WebCore {
-StreamingTextDecoderICU::StreamingTextDecoderICU(const TextEncoding& encoding)
- : m_encoding(encoding)
- , m_littleEndian(encoding.flags() & LittleEndian)
- , m_atStart(true)
- , m_numBufferedBytes(0)
- , m_converterICU(0)
-{
-}
-
-static const UChar BOM = 0xFEFF;
-static const size_t ConversionBufferSize = 16384;
+const size_t ConversionBufferSize = 16384;
static UConverter* cachedConverterICU;
-static TextEncodingID cachedConverterEncoding = InvalidEncoding;
-StreamingTextDecoderICU::~StreamingTextDecoderICU()
-{
- releaseICUConverter();
-}
+// FIXME: Registering all the encodings we get from ucnv_getAvailableName
+// includes encodings we don't want or need. For example: UTF16_PlatformEndian,
+// UTF16_OppositeEndian, UTF32_PlatformEndian, UTF32_OppositeEndian, and all
+// the encodings with commas and version numbers.
-void StreamingTextDecoderICU::releaseICUConverter()
+void TextCodecICU::registerEncodingNames(EncodingNameRegistrar registrar)
{
- if (m_converterICU) {
- if (cachedConverterICU != 0)
- ucnv_close(cachedConverterICU);
- cachedConverterICU = m_converterICU;
- cachedConverterEncoding = m_encoding.encodingID();
- m_converterICU = 0;
+ // We register Hebrew with logical ordering using a separate name.
+ // Otherwise, this would share the same canonical name as the
+ // visual ordering case, and then TextEncoding could not tell them
+ // apart; ICU works with either name.
+ registrar("ISO-8859-8-I", "ISO-8859-8-I");
+
+ int32_t numEncodings = ucnv_countAvailable();
+ for (int32_t i = 0; i < numEncodings; ++i) {
+ const char* name = ucnv_getAvailableName(i);
+ UErrorCode error = U_ZERO_ERROR;
+ // FIXME: Should we use the "MIME" standard instead of "IANA"?
+ const char* standardName = ucnv_getStandardName(name, "IANA", &error);
+ if (!U_SUCCESS(error) || !standardName)
+ continue;
+
+ registrar(standardName, standardName);
+
+ uint16_t numAliases = ucnv_countAliases(name, &error);
+ ASSERT(U_SUCCESS(error));
+ if (U_SUCCESS(error))
+ for (uint16_t j = 0; j < numAliases; ++j) {
+ error = U_ZERO_ERROR;
+ const char* alias = ucnv_getAlias(name, j, &error);
+ ASSERT(U_SUCCESS(error));
+ if (U_SUCCESS(error) && alias != standardName)
+ registrar(alias, standardName);
+ }
}
+
+ // Additional aliases that historically were present in the encoding
+ // table in WebKit on Macintosh that don't seem to be present in ICU.
+ // Perhaps we can prove these are not used on the web and remove them.
+ // Or perhaps we can get them added to ICU.
+ registrar("cnbig5", "Big5");
+ registrar("cngb", "EUC-CN");
+ registrar("csISO88598I", "ISO_8859-8-I");
+ registrar("csgb231280", "EUC-CN");
+ registrar("dos720", "cp864");
+ registrar("dos874", "cp874");
+ registrar("jis7", "ISO-2022-JP");
+ registrar("koi", "KOI8-R");
+ registrar("logical", "ISO-8859-8-I");
+ registrar("unicode11utf8", "UTF-8");
+ registrar("unicode20utf8", "UTF-8");
+ registrar("visual", "ISO-8859-8");
+ registrar("winarabic", "windows-1256");
+ registrar("winbaltic", "windows-1257");
+ registrar("wincyrillic", "windows-1251");
+ registrar("windows874", "cp874");
+ registrar("wingreek", "windows-1253");
+ registrar("winhebrew", "windows-1255");
+ registrar("winlatin2", "windows-1250");
+ registrar("winturkish", "windows-1254");
+ registrar("winvietnamese", "windows-1258");
+ registrar("xcp1250", "windows-1250");
+ registrar("xcp1251", "windows-1251");
+ registrar("xeuc", "EUC-JP");
+ registrar("xeuccn", "EUC-CN");
+ registrar("xgbk", "EUC-CN");
+ registrar("xunicode20utf8", "UTF-8");
+ registrar("xxbig5", "Big5");
}
-bool StreamingTextDecoderICU::textEncodingSupported()
+static auto_ptr<TextCodec> newTextCodecICU(const TextEncoding& encoding, const void*)
{
- if (!m_converterICU)
- createICUConverter();
-
- return m_converterICU;
+ return auto_ptr<TextCodec>(new TextCodecICU(encoding));
}
-DeprecatedString StreamingTextDecoderICU::convertUTF16(const unsigned char* s, int length)
+void TextCodecICU::registerCodecs(TextCodecRegistrar registrar)
{
- ASSERT(m_numBufferedBytes == 0 || m_numBufferedBytes == 1);
-
- const unsigned char* p = s;
- size_t len = length;
-
- DeprecatedString result("");
-
- result.reserve(length / 2);
-
- if (m_numBufferedBytes != 0 && len != 0) {
- ASSERT(m_numBufferedBytes == 1);
- UChar c;
- if (m_littleEndian)
- c = m_bufferedBytes[0] | (p[0] << 8);
- else
- c = (m_bufferedBytes[0] << 8) | p[0];
-
- if (c)
- result.append(reinterpret_cast<DeprecatedChar*>(&c), 1);
-
- m_numBufferedBytes = 0;
- p += 1;
- len -= 1;
- }
-
- while (len > 1) {
- UChar buffer[ConversionBufferSize];
- int runLength = min(len / 2, ConversionBufferSize);
- int bufferLength = 0;
- if (m_littleEndian) {
- for (int i = 0; i < runLength; ++i) {
- UChar c = p[0] | (p[1] << 8);
- p += 2;
- if (c != BOM)
- buffer[bufferLength++] = c;
- }
- } else {
- for (int i = 0; i < runLength; ++i) {
- UChar c = (p[0] << 8) | p[1];
- p += 2;
- if (c != BOM)
- buffer[bufferLength++] = c;
- }
- }
- result.append(reinterpret_cast<DeprecatedChar*>(buffer), bufferLength);
- len -= runLength * 2;
- }
-
- if (len) {
- ASSERT(m_numBufferedBytes == 0);
- m_numBufferedBytes = 1;
- m_bufferedBytes[0] = p[0];
+ // See comment above in registerEncodingNames.
+ registrar("ISO-8859-8-I", newTextCodecICU, 0);
+
+ int32_t numEncodings = ucnv_countAvailable();
+ for (int32_t i = 0; i < numEncodings; ++i) {
+ const char* name = ucnv_getAvailableName(i);
+ UErrorCode error = U_ZERO_ERROR;
+ // FIXME: Should we use the "MIME" standard instead of "IANA"?
+ const char* standardName = ucnv_getStandardName(name, "IANA", &error);
+ if (!U_SUCCESS(error) || !standardName)
+ continue;
+ registrar(standardName, newTextCodecICU, 0);
}
-
- return result;
}
-bool StreamingTextDecoderICU::convertIfASCII(const unsigned char* s, int length, DeprecatedString& str)
+TextCodecICU::TextCodecICU(const TextEncoding& encoding)
+ : m_encoding(encoding)
+ , m_numBufferedBytes(0)
+ , m_converterICU(0)
{
- ASSERT(m_numBufferedBytes == 0 || m_numBufferedBytes == 1);
-
- DeprecatedString result("");
- result.reserve(length);
-
- const unsigned char* p = s;
- size_t len = length;
- unsigned char ored = 0;
- while (len) {
- UChar buffer[ConversionBufferSize];
- int runLength = min(len, ConversionBufferSize);
- int bufferLength = 0;
- for (int i = 0; i < runLength; ++i) {
- unsigned char c = *p++;
- ored |= c;
- buffer[bufferLength++] = c;
- }
- if (ored & 0x80)
- return false;
- result.append(reinterpret_cast<DeprecatedChar*>(buffer), bufferLength);
- len -= runLength;
- }
+}
- str = result;
- return true;
+TextCodecICU::~TextCodecICU()
+{
+ releaseICUConverter();
}
-void StreamingTextDecoderICU::createICUConverter()
+void TextCodecICU::releaseICUConverter() const
{
- TextEncoding encoding = m_encoding.effectiveEncoding();
- const char* encodingName = encoding.name();
-
- bool cachedEncodingEqual = cachedConverterEncoding == encoding.encodingID();
- cachedConverterEncoding = InvalidEncoding;
-
- if (cachedEncodingEqual && cachedConverterICU) {
- m_converterICU = cachedConverterICU;
- cachedConverterICU = 0;
- } else {
- UErrorCode err = U_ZERO_ERROR;
- ASSERT(!m_converterICU);
- m_converterICU = ucnv_open(encodingName, &err);
-#if !LOG_DISABLED
- if (err == U_AMBIGUOUS_ALIAS_WARNING)
- LOG_ERROR("ICU ambiguous alias warning for encoding: %s", encodingName);
-#endif
+ if (m_converterICU) {
+ if (cachedConverterICU)
+ ucnv_close(cachedConverterICU);
+ cachedConverterICU = m_converterICU;
+ m_converterICU = 0;
}
}
-// We strip BOM characters because they can show up both at the start of content
-// and inside content, and we never want them to end up in the decoded text.
-void StreamingTextDecoderICU::appendOmittingBOM(DeprecatedString& s, const UChar* characters, int byteCount)
+void TextCodecICU::createICUConverter() const
{
- ASSERT(byteCount % sizeof(UChar) == 0);
- int start = 0;
- int characterCount = byteCount / sizeof(UChar);
- for (int i = 0; i != characterCount; ++i) {
- if (BOM == characters[i]) {
- if (start != i)
- s.append(reinterpret_cast<const DeprecatedChar*>(&characters[start]), i - start);
- start = i + 1;
+ ASSERT(!m_converterICU);
+
+ UErrorCode err;
+
+ if (cachedConverterICU) {
+ err = U_ZERO_ERROR;
+ const char* cachedName = ucnv_getName(cachedConverterICU, &err);
+ if (U_SUCCESS(err) && m_encoding == cachedName) {
+ m_converterICU = cachedConverterICU;
+ cachedConverterICU = 0;
+ return;
}
}
- if (start != characterCount)
- s.append(reinterpret_cast<const DeprecatedChar*>(&characters[start]), characterCount - start);
+
+ err = U_ZERO_ERROR;
+ m_converterICU = ucnv_open(m_encoding.name(), &err);
+#if !LOG_DISABLED
+ if (err == U_AMBIGUOUS_ALIAS_WARNING)
+ LOG_ERROR("ICU ambiguous alias warning for encoding: %s", m_encoding.name());
+#endif
}
-DeprecatedString StreamingTextDecoderICU::convertUsingICU(const unsigned char* chs, int len, bool flush)
+String TextCodecICU::decode(const char* bytes, size_t length, bool flush)
{
// Get a converter for the passed-in encoding.
if (!m_converterICU) {
createICUConverter();
- if (!m_converterICU)
- return DeprecatedString();
+ ASSERT(m_converterICU);
+ if (!m_converterICU) {
+ LOG_ERROR("error creating ICU encoder even though encoding was in table");
+ return String();
+ }
}
- DeprecatedString result("");
- result.reserve(len);
+ String result = "";
UChar buffer[ConversionBufferSize];
- const char* source = reinterpret_cast<const char*>(chs);
- const char* sourceLimit = source + len;
+ const char* source = reinterpret_cast<const char*>(bytes);
+ const char* sourceLimit = source + length;
int32_t* offsets = NULL;
UErrorCode err;
err = U_ZERO_ERROR;
ucnv_toUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, offsets, flush, &err);
int count = target - buffer;
- appendOmittingBOM(result, reinterpret_cast<const UChar*>(buffer), count * sizeof(UChar));
+ appendOmittingBOM(result, reinterpret_cast<const UChar*>(buffer), count);
} while (err == U_BUFFER_OVERFLOW_ERROR);
if (U_FAILURE(err)) {
ucnv_toUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, offsets, true, &err);
} while (source < sourceLimit);
LOG_ERROR("ICU conversion error");
- return DeprecatedString();
- }
-
- return result;
-}
-
-DeprecatedString StreamingTextDecoderICU::convert(const unsigned char* chs, int len, bool flush)
-{
- switch (m_encoding.encodingID()) {
- case UTF16Encoding:
- return convertUTF16(chs, len);
-
- case ASCIIEncoding:
- case Latin1Encoding:
- case WinLatin1Encoding: {
- DeprecatedString result;
- if (convertIfASCII(chs, len, result))
- return result;
- break;
- }
-
- case UTF8Encoding:
- // If a previous run used ICU, we might have a partly converted character.
- // If so, don't use the optimized ASCII code path.
- if (!m_converterICU) {
- DeprecatedString result;
- if (convertIfASCII(chs, len, result))
- return result;
- }
- break;
-
- default:
- break;
+ return String();
}
- //#define PARTIAL_CHARACTER_HANDLING_TEST_CHUNK_SIZE 1000
-#ifdef PARTIAL_CHARACTER_HANDLING_TEST_CHUNK_SIZE
- DeprecatedString result;
- int chunkSize;
- for (int i = 0; i != len; i += chunkSize) {
- chunkSize = len - i;
- if (chunkSize > PARTIAL_CHARACTER_HANDLING_TEST_CHUNK_SIZE) {
- chunkSize = PARTIAL_CHARACTER_HANDLING_TEST_CHUNK_SIZE;
- }
- result += convertUsingICU(chs + i, chunkSize, flush && (i + chunkSize == len));
- }
return result;
-#else
- return convertUsingICU(chs, len, flush);
-#endif
}
-DeprecatedString StreamingTextDecoderICU::toUnicode(const char* chs, int len, bool flush)
+CString TextCodecICU::encode(const UChar* characters, size_t length, bool allowEntities)
{
- ASSERT_ARG(len, len >= 0);
-
- if (!chs)
- return DeprecatedString();
-
- if (len <= 0 && !flush)
+ if (!length)
return "";
- // Handle normal case.
- if (!m_atStart)
- return convert(chs, len, flush);
-
- // Check to see if we found a BOM.
- int numBufferedBytes = m_numBufferedBytes;
- int buf1Len = numBufferedBytes;
- int buf2Len = len;
- const unsigned char* buf1 = m_bufferedBytes;
- const unsigned char* buf2 = reinterpret_cast<const unsigned char*>(chs);
- unsigned char c1 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
- unsigned char c2 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
- unsigned char c3 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
- int BOMLength = 0;
- if (c1 == 0xFF && c2 == 0xFE) {
- if (m_encoding != TextEncoding(UTF16Encoding, LittleEndian)) {
- releaseICUConverter();
- m_encoding = TextEncoding(UTF16Encoding, LittleEndian);
- m_littleEndian = true;
- }
- BOMLength = 2;
- } else if (c1 == 0xFE && c2 == 0xFF) {
- if (m_encoding != TextEncoding(UTF16Encoding, BigEndian)) {
- releaseICUConverter();
- m_encoding = TextEncoding(UTF16Encoding, BigEndian);
- m_littleEndian = false;
- }
- BOMLength = 2;
- } else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) {
- if (m_encoding != TextEncoding(UTF8Encoding)) {
- releaseICUConverter();
- m_encoding = TextEncoding(UTF8Encoding);
- }
- BOMLength = 3;
- }
-
- // Handle case where we found a BOM.
- if (BOMLength != 0) {
- ASSERT(numBufferedBytes + len >= BOMLength);
- int skip = BOMLength - numBufferedBytes;
- m_numBufferedBytes = 0;
- m_atStart = false;
- return len == skip ? DeprecatedString("") : convert(chs + skip, len - skip, flush);
- }
-
- // Handle case where we know there is no BOM coming.
- const int bufferSize = sizeof(m_bufferedBytes);
- if (numBufferedBytes + len > bufferSize || flush) {
- m_atStart = false;
- if (numBufferedBytes == 0) {
- return convert(chs, len, flush);
- }
- unsigned char bufferedBytes[sizeof(m_bufferedBytes)];
- memcpy(bufferedBytes, m_bufferedBytes, numBufferedBytes);
- m_numBufferedBytes = 0;
- return convert(bufferedBytes, numBufferedBytes, false) + convert(chs, len, flush);
- }
-
- // Continue to look for the BOM.
- memcpy(&m_bufferedBytes[numBufferedBytes], chs, len);
- m_numBufferedBytes += len;
- return "";
-}
-
-DeprecatedCString StreamingTextDecoderICU::fromUnicode(const DeprecatedString &qcs, bool allowEntities)
-{
- TextEncodingID encoding = m_encoding.effectiveEncoding().encodingID();
-
- if (encoding == WinLatin1Encoding && qcs.isAllLatin1())
- return qcs.latin1();
-
- if ((encoding == WinLatin1Encoding || encoding == UTF8Encoding || encoding == ASCIIEncoding)
- && qcs.isAllASCII())
- return qcs.ascii();
+ if (!m_converterICU)
+ createICUConverter();
+ if (!m_converterICU)
+ return CString();
// FIXME: We should see if there is "force ASCII range" mode in ICU;
// until then, we change the backslash into a yen sign.
// Encoding will change the yen sign back into a backslash.
- DeprecatedString copy = qcs;
+ String copy(characters, length);
copy.replace('\\', m_encoding.backslashAsCurrencySymbol());
- if (!m_converterICU)
- createICUConverter();
- if (!m_converterICU)
- return DeprecatedCString();
-
- // FIXME: when DeprecatedString buffer is latin1, it would be nice to
- // convert from that w/o having to allocate a unicode buffer
-
- char buffer[ConversionBufferSize];
- const UChar* source = reinterpret_cast<const UChar*>(copy.unicode());
+ const UChar* source = copy.characters();
const UChar* sourceLimit = source + copy.length();
UErrorCode err = U_ZERO_ERROR;
- DeprecatedString normalizedString;
- if (UNORM_YES != unorm_quickCheck(source, copy.length(), UNORM_NFC, &err)) {
- normalizedString.truncate(copy.length()); // normalization to NFC rarely increases the length, so this first attempt will usually succeed
-
- int32_t normalizedLength = unorm_normalize(source, copy.length(), UNORM_NFC, 0, reinterpret_cast<UChar*>(const_cast<DeprecatedChar*>(normalizedString.unicode())), copy.length(), &err);
- if (err == U_BUFFER_OVERFLOW_ERROR) {
- err = U_ZERO_ERROR;
- normalizedString.truncate(normalizedLength);
- normalizedLength = unorm_normalize(source, copy.length(), UNORM_NFC, 0, reinterpret_cast<UChar*>(const_cast<DeprecatedChar*>(normalizedString.unicode())), normalizedLength, &err);
- }
-
- source = reinterpret_cast<const UChar*>(normalizedString.unicode());
- sourceLimit = source + normalizedLength;
- }
-
- DeprecatedCString result(1); // for trailing zero
if (allowEntities)
ucnv_setFromUCallBack(m_converterICU, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err);
ASSERT(U_SUCCESS(err));
if (U_FAILURE(err))
- return DeprecatedCString();
+ return CString();
+ Vector<char> result;
+ size_t size = 0;
do {
+ char buffer[ConversionBufferSize];
char* target = buffer;
char* targetLimit = target + ConversionBufferSize;
err = U_ZERO_ERROR;
- ucnv_fromUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, 0, true, &err);
- int count = target - buffer;
- buffer[count] = 0;
- result.append(buffer);
+ ucnv_fromUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, 0, true, &err);
+ size_t count = target - buffer;
+ result.resize(size + count);
+ memcpy(result.data() + size, buffer, count);
+ size += count;
} while (err == U_BUFFER_OVERFLOW_ERROR);
- return result;
+ return CString(result.data(), size);
}
#define StreamingTextDecoderICU_H
#include "StreamingTextDecoder.h"
-#include <unicode/ucnv.h>
-#include <unicode/utypes.h>
+#include "TextEncoding.h"
+
+typedef struct UConverter UConverter;
namespace WebCore {
- class StreamingTextDecoderICU : public StreamingTextDecoder {
+ class TextCodecICU : public TextCodec {
public:
- StreamingTextDecoderICU(const TextEncoding&);
- virtual ~StreamingTextDecoderICU();
+ static void registerEncodingNames(EncodingNameRegistrar);
+ static void registerCodecs(TextCodecRegistrar);
- bool textEncodingSupported();
+ TextCodecICU(const TextEncoding&);
+ virtual ~TextCodecICU();
- virtual DeprecatedString toUnicode(const char* chs, int len, bool flush = false);
- virtual DeprecatedCString fromUnicode(const DeprecatedString&, bool allowEntities = false);
+ virtual String decode(const char*, size_t length, bool flush = false);
+ virtual CString encode(const UChar*, size_t length, bool allowEntities = false);
private:
- DeprecatedString convert(const char* chs, int len, bool flush)
- { return convert(reinterpret_cast<const unsigned char*>(chs), len, flush); }
- DeprecatedString convert(const unsigned char* chs, int len, bool flush);
-
- bool convertIfASCII(const unsigned char*, int len, DeprecatedString&);
- DeprecatedString convertUTF16(const unsigned char*, int len);
- DeprecatedString convertUsingICU(const unsigned char*, int len, bool flush);
-
- void createICUConverter();
- void releaseICUConverter();
-
- static void appendOmittingBOM(DeprecatedString&, const UChar* characters, int byteCount);
+ void createICUConverter() const;
+ void releaseICUConverter() const;
TextEncoding m_encoding;
- bool m_littleEndian;
- bool m_atStart;
-
unsigned m_numBufferedBytes;
- unsigned char m_bufferedBytes[16]; // bigger than any single multi-byte character
-
- UConverter* m_converterICU;
+ unsigned char m_bufferedBytes[16]; // bigger than any single multi-byte character
+ mutable UConverter* m_converterICU;
};
-
+
} // namespace WebCore
#endif // StreamingTextDecoderICU_H
CString String::latin1() const
{
- return TextEncoding(Latin1Encoding).fromUnicode(deprecatedString());
+ return Latin1Encoding().encode(characters(), length());
}
CString String::utf8() const
{
- return TextEncoding(UTF8Encoding).fromUnicode(deprecatedString());
+ return UTF8Encoding().encode(characters(), length());
}
bool operator==(const String& a, const DeprecatedString& b)
return UString(reinterpret_cast<const KJS::UChar*>(m_impl->characters()), m_impl->length());
}
+String String::newUninitialized(size_t length, UChar*& characterBuffer)
+{
+ return StringImpl::newUninitialized(length, characterBuffer);
+}
+
} // namespace WebCore
#ifndef NDEBUG
deleteUCharVector(m_data);
}
-UChar* StringImpl::charactersWithNullTermination()
+const UChar* StringImpl::charactersWithNullTermination()
{
if (m_hasTerminatingNullCharacter)
return m_data;
init(reinterpret_cast<const UChar*>(str.data()), str.size());
}
+StringImpl* StringImpl::newUninitialized(size_t length, UChar*& characterBuffer)
+{
+ StringImpl* result = new StringImpl;
+ result->m_length = length;
+ if (length)
+ result->m_data = newUCharVector(length);
+ characterBuffer = result->m_data;
+ return result;
+}
+
} // namespace WebCore
#define StringImpl_h
#include "Shared.h"
+#include "UChar.h"
#include <kjs/identifier.h>
#include <wtf/Forward.h>
#include <wtf/Noncopyable.h>
#include <wtf/Vector.h>
-#include <unicode/umachine.h>
#include <limits.h>
#if PLATFORM(CF)
StringImpl(const KJS::UString&);
~StringImpl();
+ static StringImpl* newUninitialized(size_t length, UChar*& characterBuffer);
+
const UChar* characters() const { return m_data; }
unsigned length() const { return m_length; }
- UChar* charactersWithNullTermination();
+ const UChar* charactersWithNullTermination();
unsigned hash() const { if (m_hash == 0) m_hash = computeHash(m_data, m_length); return m_hash; }
static unsigned computeHash(const UChar*, unsigned len);
#ifndef TextBoundaries_h
#define TextBoundaries_h
-#include <unicode/umachine.h>
-
-// FIXME: Change clients to use ICU and remove these functions.
+#include "UChar.h"
namespace WebCore {
- void findWordBoundary(const UChar*, int len, int position, int *start, int *end);
+ void findWordBoundary(const UChar*, int len, int position, int* start, int* end);
int findNextWordFromIndex(const UChar*, int len, int position, bool forward);
- void findSentenceBoundary(const UChar*, int len, int position, int *start, int *end);
+ void findSentenceBoundary(const UChar*, int len, int position, int* start, int* end);
int findNextSentenceFromIndex(const UChar*, int len, int position, bool forward);
}
--- /dev/null
+/*
+ * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "TextCodecLatin1.h"
+
+#include "CString.h"
+#include "PlatformString.h"
+#include <unicode/utf16.h>
+
+using std::auto_ptr;
+
+namespace WebCore {
+
+static const UChar table[256] = {
+ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, // 00-07
+ 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, // 08-0F
+ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, // 10-17
+ 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F, // 18-1F
+ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, // 20-27
+ 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, // 28-2F
+ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, // 30-37
+ 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, // 38-3F
+ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, // 40-47
+ 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, // 48-4F
+ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, // 50-57
+ 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, // 58-5F
+ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, // 60-67
+ 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, // 68-6F
+ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, // 70-77
+ 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F, // 78-7F
+ 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87
+ 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F
+ 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97
+ 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178, // 98-9F
+ 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, // A0-A7
+ 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, // A8-AF
+ 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, // B0-B7
+ 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, // B8-BF
+ 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, // C0-C7
+ 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, // C8-CF
+ 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, // D0-D7
+ 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, // D8-DF
+ 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, // E0-E7
+ 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, // E8-EF
+ 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, // F0-F7
+ 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF // F8-FF
+};
+
+void TextCodecLatin1::registerEncodingNames(EncodingNameRegistrar registrar)
+{
+ registrar("windows-1252", "windows-1252");
+ registrar("ISO-8859-1", "ISO-8859-1");
+ registrar("US-ASCII", "US-ASCII");
+
+ registrar("WinLatin1", "windows-1252");
+ registrar("ibm-1252", "windows-1252");
+ registrar("ibm-1252_P100-2000", "windows-1252");
+
+ registrar("8859-1", "ISO-8859-1");
+ registrar("CP819", "ISO-8859-1");
+ registrar("IBM819", "ISO-8859-1");
+ registrar("csISOLatin1", "ISO-8859-1");
+ registrar("iso-ir-100", "ISO-8859-1");
+ registrar("iso_8859-1:1987", "ISO-8859-1");
+ registrar("l1", "ISO-8859-1");
+ registrar("latin1", "ISO-8859-1");
+
+ registrar("ANSI_X3.4-1968", "US-ASCII");
+ registrar("ANSI_X3.4-1986", "US-ASCII");
+ registrar("ASCII", "US-ASCII");
+ registrar("IBM367", "US-ASCII");
+ registrar("ISO646-US", "US-ASCII");
+ registrar("ISO_646.irv:1991", "US-ASCII");
+ registrar("cp367", "US-ASCII");
+ registrar("csASCII", "US-ASCII");
+ registrar("ibm-367_P100-1995", "US-ASCII");
+ registrar("iso-ir-6", "US-ASCII");
+ registrar("iso-ir-6-us", "US-ASCII");
+ registrar("us", "US-ASCII");
+ registrar("x-ansi", "US-ASCII");
+}
+
+static auto_ptr<TextCodec> newStreamingTextDecoderWindowsLatin1(const TextEncoding&, const void*)
+{
+ return auto_ptr<TextCodec>(new TextCodecLatin1);
+}
+
+void TextCodecLatin1::registerCodecs(TextCodecRegistrar registrar)
+{
+ registrar("windows-1252", newStreamingTextDecoderWindowsLatin1, 0);
+
+ // ASCII and Latin-1 both decode as Windows Latin-1 although they retain unique identities.
+ registrar("ISO-8859-1", newStreamingTextDecoderWindowsLatin1, 0);
+ registrar("US-ASCII", newStreamingTextDecoderWindowsLatin1, 0);
+}
+
+String TextCodecLatin1::decode(const char* bytes, size_t length, bool)
+{
+ UChar* characters;
+ String string = String::newUninitialized(length, characters);
+
+ // Convert the string a fast way and simultaneously do an efficient check to see if it's all ASCII.
+ unsigned char ored = 0;
+ for (size_t i = 0; i < length; ++i) {
+ unsigned char c = bytes[i];
+ characters[i] = c;
+ ored |= c;
+ }
+
+ if (!(ored & 0x80))
+ return string;
+
+ // Convert the slightly slower way when there are non-ASCII characters.
+ for (size_t i = 0; i < length; ++i) {
+ unsigned char c = bytes[i];
+ characters[i] = table[c];
+ }
+
+ return string;
+}
+
+static CString encodeComplexWindowsLatin1(const UChar* characters, size_t length, bool allowEntities)
+{
+ Vector<char> result(length);
+ char* bytes = result.data();
+
+ size_t resultLength = 0;
+ for (size_t i = 0; i < length; ) {
+ UChar32 c;
+ U16_NEXT(characters, i, length, c);
+ unsigned char b = c;
+ // Do an efficient check to detect characters other than 00-7F and A0-FF.
+ if (b != c || (c & 0xE0) == 0x80) {
+ // Look for a way to encode this with Windows Latin-1.
+ for (b = 0x80; b < 0xA0; ++b)
+ if (table[b] == c)
+ goto gotByte;
+ // No way to encode this character with Windows Latin-1.
+ if (allowEntities) {
+ char entityBuffer[16];
+ sprintf(entityBuffer, "&#%u;", c);
+ size_t entityLength = strlen(entityBuffer);
+ result.resize(resultLength + entityLength + length - i);
+ bytes = result.data();
+ memcpy(bytes + resultLength, entityBuffer, entityLength);
+ resultLength += entityLength;
+ continue;
+ }
+ b = '?';
+ }
+ gotByte:
+ bytes[resultLength++] = b;
+ }
+
+ return CString(bytes, resultLength);
+}
+
+CString TextCodecLatin1::encode(const UChar* characters, size_t length, bool allowEntities)
+{
+ {
+ char* bytes;
+ CString string = CString::newUninitialized(length, bytes);
+
+ // Convert the string a fast way and simultaneously do an efficient check to see if it's all ASCII.
+ UChar ored = 0;
+ for (size_t i = 0; i < length; ++i) {
+ UChar c = characters[i];
+ bytes[i] = c;
+ ored |= c;
+ }
+
+ if (!(ored & 0xFF80))
+ return string;
+ }
+
+ // If it wasn't all ASCII, call the function that handles more-complex cases.
+ return encodeComplexWindowsLatin1(characters, length, allowEntities);
+}
+
+} // namespace WebCore
/*
- * Copyright (C) 2003, 2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef ExtraCFEncodings_H
-#define ExtraCFEncodings_H
+#ifndef TextCodecLatin1_h
+#define TextCodecLatin1_h
-// Until there's a CFString constant for these encodings, this works.
-// Since they are macros, they won't cause a compile failure even if the CFString constant is added.
-#define kCFStringEncodingBig5_DOSVariant (kTextEncodingBig5 | (kBig5_DOSVariant << 16))
-#define kCFStringEncodingEUC_CN_DOSVariant (kTextEncodingEUC_CN | (kEUC_CN_DOSVariant << 16))
-#define kCFStringEncodingEUC_KR_DOSVariant (kTextEncodingEUC_KR | (kEUC_KR_DOSVariant << 16))
-#define kCFStringEncodingISOLatin10 kTextEncodingISOLatin10
-#define kCFStringEncodingKOI8_U kTextEncodingKOI8_U
-#define kCFStringEncodingShiftJIS_DOSVariant (kTextEncodingShiftJIS | (kShiftJIS_DOSVariant << 16))
+#include "StreamingTextDecoder.h"
-#endif // ExtraCFEncodings_H
+namespace WebCore {
+
+ class TextCodecLatin1 : public TextCodec {
+ public:
+ static void registerEncodingNames(EncodingNameRegistrar);
+ static void registerCodecs(TextCodecRegistrar);
+
+ virtual String decode(const char*, size_t length, bool flush = false);
+ virtual CString encode(const UChar*, size_t length, bool allowEntities = false);
+ };
+
+} // namespace WebCore
+
+#endif // TextCodecLatin1_h
--- /dev/null
+/*
+ * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "TextCodecUTF16.h"
+
+#include "CString.h"
+#include "PlatformString.h"
+
+using std::auto_ptr;
+
+namespace WebCore {
+
+const UChar BOM = 0xFEFF;
+
+void TextCodecUTF16::registerEncodingNames(EncodingNameRegistrar registrar)
+{
+ registrar("UTF-16LE", "UTF-16LE");
+ registrar("UTF-16BE", "UTF-16BE");
+
+ registrar("ISO-10646-UCS-2", "UTF-16LE");
+ registrar("UCS-2", "UTF-16LE");
+ registrar("UTF-16", "UTF-16LE");
+ registrar("Unicode", "UTF-16LE");
+ registrar("csUnicode", "UTF-16LE");
+ registrar("unicodeFEFF", "UTF-16LE");
+
+ registrar("unicodeFFFE", "UTF-16BE");
+}
+
+static auto_ptr<TextCodec> newStreamingTextDecoderUTF16LE(const TextEncoding&, const void*)
+{
+ return auto_ptr<TextCodec>(new TextCodecUTF16(true));
+}
+
+static auto_ptr<TextCodec> newStreamingTextDecoderUTF16BE(const TextEncoding&, const void*)
+{
+ return auto_ptr<TextCodec>(new TextCodecUTF16(false));
+}
+
+void TextCodecUTF16::registerCodecs(TextCodecRegistrar registrar)
+{
+ registrar("UTF-16LE", newStreamingTextDecoderUTF16LE, 0);
+ registrar("UTF-16BE", newStreamingTextDecoderUTF16BE, 0);
+}
+
+String TextCodecUTF16::decode(const char* bytes, size_t length, bool)
+{
+ if (!length)
+ return String();
+
+ const unsigned char* p = reinterpret_cast<const unsigned char*>(bytes);
+ size_t numBytes = length + m_haveBufferedByte;
+ size_t numChars = numBytes / 2;
+
+ UChar* buffer;
+ String result = String::newUninitialized(numChars, buffer);
+ UChar* q = buffer;
+
+ if (m_haveBufferedByte) {
+ UChar c;
+ if (m_littleEndian)
+ c = m_bufferedByte | (p[0] << 8);
+ else
+ c = (m_bufferedByte << 8) | p[0];
+ if (c != BOM)
+ *q++ = c;
+ m_haveBufferedByte = false;
+ p += 1;
+ numChars -= 1;
+ }
+
+ if (m_littleEndian)
+ for (size_t i = 0; i < numChars; ++i) {
+ UChar c = p[0] | (p[1] << 8);
+ p += 2;
+ if (c != BOM)
+ *q++ = c;
+ }
+ else
+ for (size_t i = 0; i < numChars; ++i) {
+ UChar c = (p[0] << 8) | p[1];
+ p += 2;
+ if (c != BOM)
+ *q++ = c;
+ }
+
+ if (numBytes & 1) {
+ ASSERT(!m_haveBufferedByte);
+ m_haveBufferedByte = true;
+ m_bufferedByte = p[0];
+ }
+
+ result.truncate(q - buffer);
+ return result;
+}
+
+CString TextCodecUTF16::encode(const UChar* characters, size_t length, bool)
+{
+ char* bytes;
+ CString string = CString::newUninitialized(length * 2, bytes);
+
+ // FIXME: CString is not a reasonable data structure for encoded UTF-16, which will have
+ // null characters inside it. Perhaps the result of encode should not be a CString?
+ if (m_littleEndian)
+ for (size_t i = 0; i < length; ++i) {
+ UChar c = characters[i];
+ bytes[i * 2] = c;
+ bytes[i * 2 + 1] = c >> 8;
+ }
+ else
+ for (size_t i = 0; i < length; ++i) {
+ UChar c = characters[i];
+ bytes[i * 2] = c >> 8;
+ bytes[i * 2 + 1] = c;
+ }
+
+ return string;
+}
+
+} // namespace WebCore
--- /dev/null
+/*
+ * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TextCodecUTF16_h
+#define TextCodecUTF16_h
+
+#include "StreamingTextDecoder.h"
+
+namespace WebCore {
+
+ class TextCodecUTF16 : public TextCodec {
+ public:
+ static void registerEncodingNames(EncodingNameRegistrar);
+ static void registerCodecs(TextCodecRegistrar);
+
+ TextCodecUTF16(bool littleEndian) : m_littleEndian(littleEndian), m_haveBufferedByte(false) { }
+
+ virtual String decode(const char*, size_t length, bool flush = false);
+ virtual CString encode(const UChar*, size_t length, bool allowEntities = false);
+
+ private:
+ bool m_littleEndian;
+ bool m_haveBufferedByte;
+ unsigned char m_bufferedByte;
+ };
+
+} // namespace WebCore
+
+#endif // TextCodecUTF16_h
--- /dev/null
+/*
+ * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "TextDecoder.h"
+
+#include "TextEncodingRegistry.h"
+
+// FIXME: Would be nice to also handle BOM for UTF-7 and UTF-32.
+
+namespace WebCore {
+
+TextDecoder::TextDecoder(const TextEncoding& encoding)
+ : m_encoding(encoding)
+ , m_checkedForBOM(false)
+ , m_numBufferedBytes(0)
+{
+}
+
+void TextDecoder::reset(const TextEncoding& encoding)
+{
+ m_encoding = encoding;
+ m_codec.clear();
+ m_checkedForBOM = false;
+ m_numBufferedBytes = 0;
+}
+
+String TextDecoder::checkForBOM(const char* data, size_t length, bool flush)
+{
+ // Check to see if we found a BOM.
+ size_t numBufferedBytes = m_numBufferedBytes;
+ size_t buf1Len = numBufferedBytes;
+ size_t buf2Len = length;
+ const unsigned char* buf1 = m_bufferedBytes;
+ const unsigned char* buf2 = reinterpret_cast<const unsigned char*>(data);
+ unsigned char c1 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
+ unsigned char c2 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
+ unsigned char c3 = buf2Len ? (--buf2Len, *buf2++) : 0;
+
+ const TextEncoding* encodingConsideringBOM = &m_encoding;
+ if (c1 == 0xFF && c2 == 0xFE)
+ encodingConsideringBOM = &UTF16LittleEndianEncoding();
+ else if (c1 == 0xFE && c2 == 0xFF)
+ encodingConsideringBOM = &UTF16BigEndianEncoding();
+ else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF)
+ encodingConsideringBOM = &UTF8Encoding();
+ else if (numBufferedBytes + length <= sizeof(m_bufferedBytes) && !flush) {
+ // Continue to look for the BOM.
+ memcpy(&m_bufferedBytes[numBufferedBytes], data, length);
+ m_numBufferedBytes += length;
+ return "";
+ }
+
+ // Done checking for BOM.
+ m_codec.set(newTextCodec(*encodingConsideringBOM).release());
+ if (!m_codec)
+ return String();
+ m_checkedForBOM = true;
+
+ // Handle case where we have some buffered bytes to deal with.
+ if (numBufferedBytes) {
+ char bufferedBytes[sizeof(m_bufferedBytes)];
+ memcpy(bufferedBytes, m_bufferedBytes, numBufferedBytes);
+ m_numBufferedBytes = 0;
+ return m_codec->decode(bufferedBytes, numBufferedBytes, false)
+ + m_codec->decode(data, length, flush);
+ }
+
+ return m_codec->decode(data, length, flush);
+}
+
+} // namespace WebCore
--- /dev/null
+/*
+ * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TextDecoder_h
+#define TextDecoder_h
+
+#include "PlatformString.h"
+#include "StreamingTextDecoder.h"
+#include "TextEncoding.h"
+#include <wtf/OwnPtr.h>
+
+namespace WebCore {
+
+ class TextCodec;
+
+ class TextDecoder {
+ public:
+ TextDecoder(const TextEncoding&);
+ void reset(const TextEncoding&);
+ const TextEncoding& encoding() const { return m_encoding; };
+
+ String decode(const char* data, size_t length, bool flush = false)
+ {
+ if (!m_checkedForBOM)
+ return checkForBOM(data, length, flush);
+ return m_codec->decode(data, length, flush);
+ }
+
+ private:
+ String checkForBOM(const char*, size_t length, bool flush);
+
+ TextEncoding m_encoding;
+ OwnPtr<TextCodec> m_codec;
+
+ bool m_checkedForBOM;
+ unsigned char m_numBufferedBytes;
+ unsigned char m_bufferedBytes[2];
+ };
+
+} // namespace WebCore
+
+#endif // TextDecoder_h
/*
* Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
#include "config.h"
#include "TextEncoding.h"
-#include "CharsetNames.h"
+#include "CString.h"
+#include "PlatformString.h"
#include "StreamingTextDecoder.h"
-
+#include "TextDecoder.h"
+#include "TextEncodingRegistry.h"
+#include <unicode/unorm.h>
+#include <wtf/HashSet.h>
#include <wtf/OwnPtr.h>
namespace WebCore {
-TextEncoding::TextEncoding(const char* name, bool eightBitOnly)
+static void addEncodingName(HashSet<const char*>& set, const char* name)
+{
+ const char* atomicName = atomicCanonicalTextEncodingName(name);
+ if (atomicName)
+ set.add(atomicName);
+}
+
+TextEncoding::TextEncoding(const char* name)
+ : m_name(atomicCanonicalTextEncodingName(name))
+{
+}
+
+TextEncoding::TextEncoding(const String& name)
+ : m_name(atomicCanonicalTextEncodingName(name.characters(), name.length()))
+{
+}
+
+String TextEncoding::decode(const char* data, size_t length) const
+{
+ if (!m_name)
+ return String();
+
+ return TextDecoder(*this).decode(data, length, true);
+}
+
+CString TextEncoding::encode(const UChar* characters, size_t length, bool allowEntities) const
{
- m_encodingID = textEncodingIDFromCharsetName(name, &m_flags);
- if (eightBitOnly && m_encodingID == UTF16Encoding)
- m_encodingID = UTF8Encoding;
+ if (!m_name)
+ return CString();
+
+ if (!length)
+ return "";
+
+ // FIXME: What's the right place to do normalization?
+ // It's a little strange to do it inside the encode function.
+ // Perhaps normalization should be an explicit step done before calling encode.
+
+ const UChar* source = characters;
+ size_t sourceLength = length;
+
+ Vector<UChar> normalizedCharacters;
+
+ UErrorCode err = U_ZERO_ERROR;
+ if (unorm_quickCheck(source, sourceLength, UNORM_NFC, &err) != UNORM_YES) {
+ // First try using the length of the original string, since normalization to NFC rarely increases length.
+ normalizedCharacters.resize(sourceLength);
+ int32_t normalizedLength = unorm_normalize(source, length, UNORM_NFC, 0, normalizedCharacters.data(), length, &err);
+ if (err == U_BUFFER_OVERFLOW_ERROR) {
+ err = U_ZERO_ERROR;
+ normalizedCharacters.resize(normalizedLength);
+ normalizedLength = unorm_normalize(source, length, UNORM_NFC, 0, normalizedCharacters.data(), normalizedLength, &err);
+ }
+ ASSERT(U_SUCCESS(err));
+
+ source = normalizedCharacters.data();
+ sourceLength = normalizedLength;
+ }
+
+ return newTextCodec(*this)->encode(source, sourceLength, allowEntities);
}
-TextEncoding TextEncoding::effectiveEncoding() const
+bool TextEncoding::usesVisualOrdering() const
{
- TextEncodingID id = m_encodingID;
- if (id == Latin1Encoding || id == ASCIIEncoding)
- id = WinLatin1Encoding;
- return TextEncoding(id, m_flags);
+ static const char* const a = atomicCanonicalTextEncodingName("ISO-8859-8");
+ return m_name == a;
}
-const char* TextEncoding::name() const
+bool TextEncoding::isJapanese() const
{
- return charsetNameFromTextEncodingID(m_encodingID);
+ static HashSet<const char*> set;
+ if (set.isEmpty()) {
+ addEncodingName(set, "x-mac-japanese");
+ addEncodingName(set, "cp932");
+ addEncodingName(set, "JIS_X0201");
+ addEncodingName(set, "JIS_X0208-1983");
+ addEncodingName(set, "JIS_X0208-1990");
+ addEncodingName(set, "JIS_X0212-1990");
+ addEncodingName(set, "JIS_C6226-1978");
+ addEncodingName(set, "Shift_JIS_X0213-2000");
+ addEncodingName(set, "ISO-2022-JP");
+ addEncodingName(set, "ISO-2022-JP-2");
+ addEncodingName(set, "ISO-2022-JP-1");
+ addEncodingName(set, "ISO-2022-JP-3");
+ addEncodingName(set, "EUC-JP");
+ addEncodingName(set, "Shift_JIS");
+ }
+ return m_name && set.contains(m_name);
}
UChar TextEncoding::backslashAsCurrencySymbol() const
{
- if (m_flags & BackslashIsYen)
- return 0x00A5; // yen sign
-
- return '\\';
+ static const char* const a = atomicCanonicalTextEncodingName("Shift_JIS_X0213-2000");
+ static const char* const b = atomicCanonicalTextEncodingName("EUC-JP");
+ return (m_name == a || m_name == b) ? 0x00A5 : '\\';
+}
+
+const TextEncoding& TextEncoding::closest8BitEquivalent() const
+{
+ if (*this == UTF16BigEndianEncoding() || *this == UTF16LittleEndianEncoding())
+ return UTF8Encoding();
+ return *this;
+}
+
+const TextEncoding& ASCIIEncoding()
+{
+ static TextEncoding globalASCIIEncoding("ASCII");
+ return globalASCIIEncoding;
+}
+
+const TextEncoding& Latin1Encoding()
+{
+ static TextEncoding globalLatin1Encoding("Latin-1");
+ return globalLatin1Encoding;
+}
+
+const TextEncoding& UTF16BigEndianEncoding()
+{
+ static TextEncoding globalUTF16BigEndianEncoding("UTF-16BE");
+ return globalUTF16BigEndianEncoding;
+}
+
+const TextEncoding& UTF16LittleEndianEncoding()
+{
+ static TextEncoding globalUTF16LittleEndianEncoding("UTF-16LE");
+ return globalUTF16LittleEndianEncoding;
}
-DeprecatedString TextEncoding::toUnicode(const char* chs, int len) const
+const TextEncoding& UTF8Encoding()
{
- OwnPtr<StreamingTextDecoder> decoder(StreamingTextDecoder::create(*this));
- return decoder->toUnicode(chs, len, true);
+ static&nb