fourthTier: String::utf8() should also be available as StringImpl::utf8() so that...
authoroliver@apple.com <oliver@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Thu, 25 Jul 2013 03:59:12 +0000 (03:59 +0000)
committeroliver@apple.com <oliver@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Thu, 25 Jul 2013 03:59:12 +0000 (03:59 +0000)
https://bugs.webkit.org/show_bug.cgi?id=115393

Reviewed by Geoffrey Garen.

Source/JavaScriptCore:

* runtime/JSGlobalObjectFunctions.cpp:
(JSC::encode):

Source/WebCore:

No new tests because no new behavior.

* Modules/websockets/WebSocket.cpp:
(WebCore::WebSocket::close):
* Modules/websockets/WebSocketChannel.cpp:
(WebCore::WebSocketChannel::send):
* html/MediaFragmentURIParser.cpp:
(WebCore::MediaFragmentURIParser::parseFragments):

Source/WTF:

* WTF.xcodeproj/project.pbxproj:
* wtf/text/ConversionMode.h: Added.
(WTF):
* wtf/text/StringImpl.cpp:
(WTF):
(WTF::putUTF8Triple):
(WTF::StringImpl::utf8):
* wtf/text/StringImpl.h:
(StringImpl):
* wtf/text/WTFString.cpp:
(WTF):
(WTF::String::utf8):
* wtf/text/WTFString.h:
(String):

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@153135 268f45cc-cd09-0410-ab3c-d52691b4dbfc

13 files changed:
Source/JavaScriptCore/ChangeLog
Source/JavaScriptCore/runtime/JSGlobalObjectFunctions.cpp
Source/WTF/ChangeLog
Source/WTF/WTF.xcodeproj/project.pbxproj
Source/WTF/wtf/text/ConversionMode.h [new file with mode: 0644]
Source/WTF/wtf/text/StringImpl.cpp
Source/WTF/wtf/text/StringImpl.h
Source/WTF/wtf/text/WTFString.cpp
Source/WTF/wtf/text/WTFString.h
Source/WebCore/ChangeLog
Source/WebCore/Modules/websockets/WebSocket.cpp
Source/WebCore/Modules/websockets/WebSocketChannel.cpp
Source/WebCore/html/MediaFragmentURIParser.cpp

index ffc1411..9f98129 100644 (file)
@@ -1,3 +1,13 @@
+2013-04-29  Filip Pizlo  <fpizlo@apple.com>
+
+        fourthTier: String::utf8() should also be available as StringImpl::utf8() so that you don't have to ref() a StringImpl just to get its utf8()
+        https://bugs.webkit.org/show_bug.cgi?id=115393
+
+        Reviewed by Geoffrey Garen.
+
+        * runtime/JSGlobalObjectFunctions.cpp:
+        (JSC::encode):
+
 2013-07-16  Oliver Hunt <oliver@apple.com>
 
         Merge dfgFourthTier r149301
index 0efaf84..e09223e 100644 (file)
@@ -53,7 +53,7 @@ namespace JSC {
 
 static JSValue encode(ExecState* exec, const char* doNotEscape)
 {
-    CString cstr = exec->argument(0).toString(exec)->value(exec).utf8(String::StrictConversion);
+    CString cstr = exec->argument(0).toString(exec)->value(exec).utf8(StrictConversion);
     if (!cstr.data())
         return throwError(exec, createURIError(exec, ASCIILiteral("String contained an illegal UTF-16 sequence.")));
 
index 339eb0c..11192c1 100644 (file)
@@ -1,3 +1,25 @@
+2013-04-29  Filip Pizlo  <fpizlo@apple.com>
+
+        fourthTier: String::utf8() should also be available as StringImpl::utf8() so that you don't have to ref() a StringImpl just to get its utf8()
+        https://bugs.webkit.org/show_bug.cgi?id=115393
+
+        Reviewed by Geoffrey Garen.
+
+        * WTF.xcodeproj/project.pbxproj:
+        * wtf/text/ConversionMode.h: Added.
+        (WTF):
+        * wtf/text/StringImpl.cpp:
+        (WTF):
+        (WTF::putUTF8Triple):
+        (WTF::StringImpl::utf8):
+        * wtf/text/StringImpl.h:
+        (StringImpl):
+        * wtf/text/WTFString.cpp:
+        (WTF):
+        (WTF::String::utf8):
+        * wtf/text/WTFString.h:
+        (String):
+
 2013-07-16  Oliver Hunt <oliver@apple.com>
 
         Merge dfgFourthTier r149301
index 9c3f3de..303ea53 100644 (file)
@@ -23,6 +23,7 @@
 /* Begin PBXBuildFile section */
                0F0D85B417234CC100338210 /* NoLock.h in Headers */ = {isa = PBXBuildFile; fileRef = 0F0D85B317234CB100338210 /* NoLock.h */; };
                0F87105A16643F190090B0AD /* RawPointer.h in Headers */ = {isa = PBXBuildFile; fileRef = 0F87105916643F190090B0AD /* RawPointer.h */; };
+               0F8F2B9C172F2596007DBDA5 /* ConversionMode.h in Headers */ = {isa = PBXBuildFile; fileRef = 0F8F2B9B172F2594007DBDA5 /* ConversionMode.h */; };
                0F8F2B91172E00FC007DBDA5 /* CompilationThread.h in Headers */ = {isa = PBXBuildFile; fileRef = 0F8F2B90172E00F0007DBDA5 /* CompilationThread.h */; };
                0F8F2B92172E0103007DBDA5 /* CompilationThread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0F8F2B8F172E00F0007DBDA5 /* CompilationThread.cpp */; };
                0F9D3360165DBA73005AD387 /* FilePrintStream.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0F9D335B165DBA73005AD387 /* FilePrintStream.cpp */; };
 /* Begin PBXFileReference section */
                0F0D85B317234CB100338210 /* NoLock.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = NoLock.h; sourceTree = "<group>"; };
                0F87105916643F190090B0AD /* RawPointer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RawPointer.h; sourceTree = "<group>"; };
+               0F8F2B9B172F2594007DBDA5 /* ConversionMode.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ConversionMode.h; sourceTree = "<group>"; };
                0F8F2B8F172E00F0007DBDA5 /* CompilationThread.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = CompilationThread.cpp; sourceTree = "<group>"; };
                0F8F2B90172E00F0007DBDA5 /* CompilationThread.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = CompilationThread.h; sourceTree = "<group>"; };
                0F9D335B165DBA73005AD387 /* FilePrintStream.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = FilePrintStream.cpp; sourceTree = "<group>"; };
                A8A4731B151A825B004123FF /* text */ = {
                        isa = PBXGroup;
                        children = (
+                               0F8F2B9B172F2594007DBDA5 /* ConversionMode.h */,
                                A8A4731C151A825B004123FF /* ASCIIFastPath.h */,
                                A8A4731D151A825B004123FF /* AtomicString.cpp */,
                                A8A4731E151A825B004123FF /* AtomicString.h */,
                                A8A47429151A825B004123FF /* StaticConstructors.h in Headers */,
                                A8A4742A151A825B004123FF /* StdLibExtras.h in Headers */,
                                1A6BB769162F300500DD16DB /* StreamBuffer.h in Headers */,
+                               0F8F2B9C172F2596007DBDA5 /* ConversionMode.h in Headers */,
                                A8A4743B151A825B004123FF /* StringBuffer.h in Headers */,
                                A8A4743D151A825B004123FF /* StringBuilder.h in Headers */,
                                A8A4743E151A825B004123FF /* StringConcatenate.h in Headers */,
diff --git a/Source/WTF/wtf/text/ConversionMode.h b/Source/WTF/wtf/text/ConversionMode.h
new file mode 100644 (file)
index 0000000..7c17272
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2013 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef ConversionMode_h
+#define ConversionMode_h
+
+namespace WTF {
+
+typedef enum {
+    LenientConversion,
+    StrictConversion,
+    StrictConversionReplacingUnpairedSurrogatesWithFFFD,
+} ConversionMode;
+
+} // namespace WTF
+
+using WTF::ConversionMode;
+using WTF::LenientConversion;
+using WTF::StrictConversion;
+using WTF::StrictConversionReplacingUnpairedSurrogatesWithFFFD;
+
+#endif // ConversionMode_h
+
index d1a3b56..be56280 100644 (file)
@@ -31,7 +31,9 @@
 #include <wtf/ProcessID.h>
 #include <wtf/StdLibExtras.h>
 #include <wtf/WTFThreadData.h>
+#include <wtf/text/CString.h>
 #include <wtf/unicode/CharacterNames.h>
+#include <wtf/unicode/UTF8.h>
 
 #ifdef STRING_STATS
 #include <unistd.h>
@@ -1948,4 +1950,92 @@ size_t StringImpl::sizeInBytes() const
     return size + sizeof(*this);
 }
 
+// Helper to write a three-byte UTF-8 code point to the buffer, caller must check room is available.
+static inline void putUTF8Triple(char*& buffer, UChar ch)
+{
+    ASSERT(ch >= 0x0800);
+    *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0);
+    *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80);
+    *buffer++ = static_cast<char>((ch & 0x3F) | 0x80);
+}
+
+CString StringImpl::utf8(ConversionMode mode) const
+{
+    unsigned length = this->length();
+
+    if (!length)
+        return CString("", 0);
+
+    // Allocate a buffer big enough to hold all the characters
+    // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).
+    // Optimization ideas, if we find this function is hot:
+    //  * We could speculatively create a CStringBuffer to contain 'length' 
+    //    characters, and resize if necessary (i.e. if the buffer contains
+    //    non-ascii characters). (Alternatively, scan the buffer first for
+    //    ascii characters, so we know this will be sufficient).
+    //  * We could allocate a CStringBuffer with an appropriate size to
+    //    have a good chance of being able to write the string into the
+    //    buffer without reallocing (say, 1.5 x length).
+    if (length > numeric_limits<unsigned>::max() / 3)
+        return CString();
+    Vector<char, 1024> bufferVector(length * 3);
+
+    char* buffer = bufferVector.data();
+
+    if (is8Bit()) {
+        const LChar* characters = this->characters8();
+
+        ConversionResult result = convertLatin1ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size());
+        ASSERT_UNUSED(result, result != targetExhausted); // (length * 3) should be sufficient for any conversion
+    } else {
+        const UChar* characters = this->characters16();
+
+        if (mode == StrictConversionReplacingUnpairedSurrogatesWithFFFD) {
+            const UChar* charactersEnd = characters + length;
+            char* bufferEnd = buffer + bufferVector.size();
+            while (characters < charactersEnd) {
+                // Use strict conversion to detect unpaired surrogates.
+                ConversionResult result = convertUTF16ToUTF8(&characters, charactersEnd, &buffer, bufferEnd, true);
+                ASSERT(result != targetExhausted);
+                // Conversion fails when there is an unpaired surrogate.
+                // Put replacement character (U+FFFD) instead of the unpaired surrogate.
+                if (result != conversionOK) {
+                    ASSERT((0xD800 <= *characters && *characters <= 0xDFFF));
+                    // There should be room left, since one UChar hasn't been converted.
+                    ASSERT((buffer + 3) <= bufferEnd);
+                    putUTF8Triple(buffer, replacementCharacter);
+                    ++characters;
+                }
+            }
+        } else {
+            bool strict = mode == StrictConversion;
+            ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), strict);
+            ASSERT(result != targetExhausted); // (length * 3) should be sufficient for any conversion
+
+            // Only produced from strict conversion.
+            if (result == sourceIllegal) {
+                ASSERT(strict);
+                return CString();
+            }
+
+            // Check for an unconverted high surrogate.
+            if (result == sourceExhausted) {
+                if (strict)
+                    return CString();
+                // This should be one unpaired high surrogate. Treat it the same
+                // was as an unpaired high surrogate would have been handled in
+                // the middle of a string with non-strict conversion - which is
+                // to say, simply encode it to UTF-8.
+                ASSERT((characters + 1) == (this->characters() + length));
+                ASSERT((*characters >= 0xD800) && (*characters <= 0xDBFF));
+                // There should be room left, since one UChar hasn't been converted.
+                ASSERT((buffer + 3) <= (buffer + bufferVector.size()));
+                putUTF8Triple(buffer, *characters);
+            }
+        }
+    }
+
+    return CString(bufferVector.data(), buffer - bufferVector.data());
+}
+
 } // namespace WTF
index 0d6c358..c0f0c59 100644 (file)
@@ -29,6 +29,7 @@
 #include <wtf/StdLibExtras.h>
 #include <wtf/StringHasher.h>
 #include <wtf/Vector.h>
+#include <wtf/text/ConversionMode.h>
 #include <wtf/unicode/Unicode.h>
 
 #if PLATFORM(QT)
@@ -546,6 +547,8 @@ public:
 #if PLATFORM(QT)
     QStringData* qStringData() { return bufferOwnership() == BufferAdoptedQString ? m_qStringData : 0; }
 #endif
+    
+    WTF_EXPORT_STRING_API CString utf8(ConversionMode = LenientConversion) const;
 
 private:
     // The high bits of 'hash' are always empty, but we prefer to store our flags
index 29c13c1..5ae5699 100644 (file)
@@ -792,92 +792,12 @@ CString String::latin1() const
     return result;
 }
 
-// Helper to write a three-byte UTF-8 code point to the buffer, caller must check room is available.
-static inline void putUTF8Triple(char*& buffer, UChar ch)
-{
-    ASSERT(ch >= 0x0800);
-    *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0);
-    *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80);
-    *buffer++ = static_cast<char>((ch & 0x3F) | 0x80);
-}
-
 CString String::utf8(ConversionMode mode) const
 {
-    unsigned length = this->length();
-
-    if (!length)
+    if (!m_impl)
         return CString("", 0);
-
-    // Allocate a buffer big enough to hold all the characters
-    // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).
-    // Optimization ideas, if we find this function is hot:
-    //  * We could speculatively create a CStringBuffer to contain 'length' 
-    //    characters, and resize if necessary (i.e. if the buffer contains
-    //    non-ascii characters). (Alternatively, scan the buffer first for
-    //    ascii characters, so we know this will be sufficient).
-    //  * We could allocate a CStringBuffer with an appropriate size to
-    //    have a good chance of being able to write the string into the
-    //    buffer without reallocing (say, 1.5 x length).
-    if (length > numeric_limits<unsigned>::max() / 3)
-        return CString();
-    Vector<char, 1024> bufferVector(length * 3);
-
-    char* buffer = bufferVector.data();
-
-    if (is8Bit()) {
-        const LChar* characters = this->characters8();
-
-        ConversionResult result = convertLatin1ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size());
-        ASSERT_UNUSED(result, result != targetExhausted); // (length * 3) should be sufficient for any conversion
-    } else {
-        const UChar* characters = this->characters16();
-
-        if (mode == StrictConversionReplacingUnpairedSurrogatesWithFFFD) {
-            const UChar* charactersEnd = characters + length;
-            char* bufferEnd = buffer + bufferVector.size();
-            while (characters < charactersEnd) {
-                // Use strict conversion to detect unpaired surrogates.
-                ConversionResult result = convertUTF16ToUTF8(&characters, charactersEnd, &buffer, bufferEnd, true);
-                ASSERT(result != targetExhausted);
-                // Conversion fails when there is an unpaired surrogate.
-                // Put replacement character (U+FFFD) instead of the unpaired surrogate.
-                if (result != conversionOK) {
-                    ASSERT((0xD800 <= *characters && *characters <= 0xDFFF));
-                    // There should be room left, since one UChar hasn't been converted.
-                    ASSERT((buffer + 3) <= bufferEnd);
-                    putUTF8Triple(buffer, replacementCharacter);
-                    ++characters;
-                }
-            }
-        } else {
-            bool strict = mode == StrictConversion;
-            ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), strict);
-            ASSERT(result != targetExhausted); // (length * 3) should be sufficient for any conversion
-
-            // Only produced from strict conversion.
-            if (result == sourceIllegal) {
-                ASSERT(strict);
-                return CString();
-            }
-
-            // Check for an unconverted high surrogate.
-            if (result == sourceExhausted) {
-                if (strict)
-                    return CString();
-                // This should be one unpaired high surrogate. Treat it the same
-                // was as an unpaired high surrogate would have been handled in
-                // the middle of a string with non-strict conversion - which is
-                // to say, simply encode it to UTF-8.
-                ASSERT((characters + 1) == (this->characters() + length));
-                ASSERT((*characters >= 0xD800) && (*characters <= 0xDBFF));
-                // There should be room left, since one UChar hasn't been converted.
-                ASSERT((buffer + 3) <= (buffer + bufferVector.size()));
-                putUTF8Triple(buffer, *characters);
-            }
-        }
-    }
-
-    return CString(bufferVector.data(), buffer - bufferVector.data());
+    
+    return m_impl->utf8(mode);
 }
 
 String String::make8BitFrom16BitSource(const UChar* source, size_t length)
index 1c8e909..9b5e6ed 100644 (file)
@@ -214,12 +214,6 @@ public:
     WTF_EXPORT_STRING_API CString ascii() const;
     WTF_EXPORT_STRING_API CString latin1() const;
 
-    typedef enum {
-        LenientConversion,
-        StrictConversion,
-        StrictConversionReplacingUnpairedSurrogatesWithFFFD,
-    } ConversionMode;
-
     WTF_EXPORT_STRING_API CString utf8(ConversionMode = LenientConversion) const;
 
     UChar operator[](unsigned index) const
index 157020c..bbe01fb 100644 (file)
@@ -1,3 +1,19 @@
+2013-04-29  Filip Pizlo  <fpizlo@apple.com>
+
+        fourthTier: String::utf8() should also be available as StringImpl::utf8() so that you don't have to ref() a StringImpl just to get its utf8()
+        https://bugs.webkit.org/show_bug.cgi?id=115393
+
+        Reviewed by Geoffrey Garen.
+
+        No new tests because no new behavior.
+
+        * Modules/websockets/WebSocket.cpp:
+        (WebCore::WebSocket::close):
+        * Modules/websockets/WebSocketChannel.cpp:
+        (WebCore::WebSocketChannel::send):
+        * html/MediaFragmentURIParser.cpp:
+        (WebCore::MediaFragmentURIParser::parseFragments):
+
 2013-07-24  Simon Fraser  <simon.fraser@apple.com>
 
         [iOS] Captions are clipped in documents using pagination
index ce9d2b0..b4b49c1 100644 (file)
@@ -378,7 +378,7 @@ void WebSocket::close(int code, const String& reason, ExceptionCode& ec)
             ec = INVALID_ACCESS_ERR;
             return;
         }
-        CString utf8 = reason.utf8(String::StrictConversionReplacingUnpairedSurrogatesWithFFFD);
+        CString utf8 = reason.utf8(StrictConversionReplacingUnpairedSurrogatesWithFFFD);
         if (utf8.length() > maxReasonSizeInBytes) {
             scriptExecutionContext()->addConsoleMessage(JSMessageSource, ErrorMessageLevel, "WebSocket close message is too long.");
             ec = SYNTAX_ERR;
index 0a3ff0c..5be99a9 100644 (file)
@@ -139,7 +139,7 @@ String WebSocketChannel::extensions()
 ThreadableWebSocketChannel::SendResult WebSocketChannel::send(const String& message)
 {
     LOG(Network, "WebSocketChannel %p send() Sending String '%s'", this, message.utf8().data());
-    CString utf8 = message.utf8(String::StrictConversionReplacingUnpairedSurrogatesWithFFFD);
+    CString utf8 = message.utf8(StrictConversionReplacingUnpairedSurrogatesWithFFFD);
     enqueueTextFrame(utf8);
     processOutgoingFrameQueue();
     // According to WebSocket API specification, WebSocket.send() should return void instead
index bd09181..3959dda 100644 (file)
@@ -141,11 +141,11 @@ void MediaFragmentURIParser::parseFragments()
         //     name or value are not valid UTF-8 strings, then remove the name-value pair from the list.
         bool validUTF8 = true;
         if (!name.isEmpty()) {
-            name = name.utf8(String::StrictConversion).data();
+            name = name.utf8(StrictConversion).data();
             validUTF8 = !name.isEmpty();
         }
         if (validUTF8 && !value.isEmpty()) {
-            value = value.utf8(String::StrictConversion).data();
+            value = value.utf8(StrictConversion).data();
             validUTF8 = !value.isEmpty();
         }