Unreviewed, rolling out r209058 and r209074.
authorcommit-queue@webkit.org <commit-queue@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Wed, 30 Nov 2016 04:54:04 +0000 (04:54 +0000)
committercommit-queue@webkit.org <commit-queue@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Wed, 30 Nov 2016 04:54:04 +0000 (04:54 +0000)
https://bugs.webkit.org/show_bug.cgi?id=165188

These changes caused API test StringBuilderTest.Equal to crash
and/or fail. (Requested by ryanhaddad on #webkit).

Reverted changesets:

"Streamline and speed up tokenizer and segmented string
classes"
https://bugs.webkit.org/show_bug.cgi?id=165003
http://trac.webkit.org/changeset/209058

"REGRESSION (r209058): API test StringBuilderTest.Equal
crashing"
https://bugs.webkit.org/show_bug.cgi?id=165142
http://trac.webkit.org/changeset/209074

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@209120 268f45cc-cd09-0410-ab3c-d52691b4dbfc

40 files changed:
Source/JavaScriptCore/ChangeLog
Source/JavaScriptCore/runtime/JSONObject.cpp
Source/WTF/ChangeLog
Source/WTF/wtf/text/StringBuilder.cpp
Source/WTF/wtf/text/StringBuilder.h
Source/WebCore/ChangeLog
Source/WebCore/bindings/js/JSHTMLDocumentCustom.cpp
Source/WebCore/css/parser/CSSTokenizer.cpp
Source/WebCore/css/parser/CSSTokenizer.h
Source/WebCore/css/parser/CSSTokenizerInputStream.h
Source/WebCore/dom/Document.cpp
Source/WebCore/dom/Document.h
Source/WebCore/dom/DocumentParser.h
Source/WebCore/dom/RawDataDocumentParser.h
Source/WebCore/html/FTPDirectoryDocument.cpp
Source/WebCore/html/parser/HTMLDocumentParser.cpp
Source/WebCore/html/parser/HTMLDocumentParser.h
Source/WebCore/html/parser/HTMLEntityParser.cpp
Source/WebCore/html/parser/HTMLInputStream.h
Source/WebCore/html/parser/HTMLMetaCharsetParser.cpp
Source/WebCore/html/parser/HTMLSourceTracker.cpp
Source/WebCore/html/parser/HTMLSourceTracker.h
Source/WebCore/html/parser/HTMLTokenizer.cpp
Source/WebCore/html/parser/InputStreamPreprocessor.h
Source/WebCore/html/track/BufferedLineReader.cpp
Source/WebCore/html/track/BufferedLineReader.h
Source/WebCore/html/track/InbandGenericTextTrack.cpp
Source/WebCore/html/track/InbandGenericTextTrack.h
Source/WebCore/html/track/InbandTextTrack.h
Source/WebCore/html/track/WebVTTParser.cpp
Source/WebCore/html/track/WebVTTParser.h
Source/WebCore/html/track/WebVTTTokenizer.cpp
Source/WebCore/platform/graphics/InbandTextTrackPrivateClient.h
Source/WebCore/platform/text/SegmentedString.cpp
Source/WebCore/platform/text/SegmentedString.h
Source/WebCore/xml/parser/CharacterReferenceParserInlines.h
Source/WebCore/xml/parser/MarkupTokenizerInlines.h
Source/WebCore/xml/parser/XMLDocumentParser.cpp
Source/WebCore/xml/parser/XMLDocumentParser.h
Source/WebCore/xml/parser/XMLDocumentParserLibxml2.cpp

index 4446b5c..cb8ab73 100644 (file)
@@ -1,3 +1,23 @@
+2016-11-29  Commit Queue  <commit-queue@webkit.org>
+
+        Unreviewed, rolling out r209058 and r209074.
+        https://bugs.webkit.org/show_bug.cgi?id=165188
+
+        These changes caused API test StringBuilderTest.Equal to crash
+        and/or fail. (Requested by ryanhaddad on #webkit).
+
+        Reverted changesets:
+
+        "Streamline and speed up tokenizer and segmented string
+        classes"
+        https://bugs.webkit.org/show_bug.cgi?id=165003
+        http://trac.webkit.org/changeset/209058
+
+        "REGRESSION (r209058): API test StringBuilderTest.Equal
+        crashing"
+        https://bugs.webkit.org/show_bug.cgi?id=165142
+        http://trac.webkit.org/changeset/209074
+
 2016-11-29  Caitlin Potter  <caitp@igalia.com>
 
         [JSC] always wrap AwaitExpression operand in a new Promise
index 39221d3..2957ae2 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2009-2016 Apple Inc. All rights reserved.
+ * Copyright (C) 20092016 Apple Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -354,7 +354,7 @@ Stringifier::StringifyResult Stringifier::appendStringifiedValue(StringBuilder&
     }
 
     if (value.isString()) {
-        builder.appendQuotedJSONString(asString(value)->viewWithUnderlyingString(*m_exec).view);
+        builder.appendQuotedJSONString(asString(value)->value(m_exec));
         return StringifySucceeded;
     }
 
index cfce5e4..71c33f8 100644 (file)
@@ -1,3 +1,23 @@
+2016-11-29  Commit Queue  <commit-queue@webkit.org>
+
+        Unreviewed, rolling out r209058 and r209074.
+        https://bugs.webkit.org/show_bug.cgi?id=165188
+
+        These changes caused API test StringBuilderTest.Equal to crash
+        and/or fail. (Requested by ryanhaddad on #webkit).
+
+        Reverted changesets:
+
+        "Streamline and speed up tokenizer and segmented string
+        classes"
+        https://bugs.webkit.org/show_bug.cgi?id=165003
+        http://trac.webkit.org/changeset/209058
+
+        "REGRESSION (r209058): API test StringBuilderTest.Equal
+        crashing"
+        https://bugs.webkit.org/show_bug.cgi?id=165142
+        http://trac.webkit.org/changeset/209074
+
 2016-11-29  Simon Fraser  <simon.fraser@apple.com>
 
         Allow TracePoint to take arbitrary data
index cfa0c21..afe1341 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2016 Apple Inc. All rights reserved.
+ * Copyright (C) 2010, 2013, 2016 Apple Inc. All rights reserved.
  * Copyright (C) 2012 Google Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -29,6 +29,7 @@
 
 #include "IntegerToStringConversion.h"
 #include "MathExtras.h"
+#include "WTFString.h"
 #include <wtf/dtoa.h>
 
 namespace WTF {
@@ -39,18 +40,6 @@ static unsigned expandedCapacity(unsigned capacity, unsigned requiredLength)
     return std::max(requiredLength, std::max(minimumCapacity, capacity * 2));
 }
 
-template<> ALWAYS_INLINE LChar* StringBuilder::bufferCharacters<LChar>()
-{
-    ASSERT(m_is8Bit);
-    return m_bufferCharacters8;
-}
-
-template<> ALWAYS_INLINE UChar* StringBuilder::bufferCharacters<UChar>()
-{
-    ASSERT(!m_is8Bit);
-    return m_bufferCharacters16;
-}
-
 void StringBuilder::reifyString() const
 {
     // Check if the string already exists.
@@ -108,7 +97,6 @@ void StringBuilder::resize(unsigned newSize)
 void StringBuilder::allocateBuffer(const LChar* currentCharacters, unsigned requiredLength)
 {
     ASSERT(m_is8Bit);
-
     // Copy the existing data into a new buffer, set result to point to the end of the existing data.
     auto buffer = StringImpl::createUninitialized(requiredLength, m_bufferCharacters8);
     memcpy(m_bufferCharacters8, currentCharacters, static_cast<size_t>(m_length) * sizeof(LChar)); // This can't overflow.
@@ -124,7 +112,6 @@ void StringBuilder::allocateBuffer(const LChar* currentCharacters, unsigned requ
 void StringBuilder::allocateBuffer(const UChar* currentCharacters, unsigned requiredLength)
 {
     ASSERT(!m_is8Bit);
-
     // Copy the existing data into a new buffer, set result to point to the end of the existing data.
     auto buffer = StringImpl::createUninitialized(requiredLength, m_bufferCharacters16);
     memcpy(m_bufferCharacters16, currentCharacters, static_cast<size_t>(m_length) * sizeof(UChar)); // This can't overflow.
@@ -137,11 +124,10 @@ void StringBuilder::allocateBuffer(const UChar* currentCharacters, unsigned requ
 
 // Allocate a new 16 bit buffer, copying in currentCharacters (which is 8 bit and may come
 // from either m_string or m_buffer, neither will be reassigned until the copy has completed).
-void StringBuilder::allocateBufferUpconvert(const LChar* currentCharacters, unsigned requiredLength)
+void StringBuilder::allocateBufferUpConvert(const LChar* currentCharacters, unsigned requiredLength)
 {
     ASSERT(m_is8Bit);
     ASSERT(requiredLength >= m_length);
-
     // Copy the existing data into a new buffer, set result to point to the end of the existing data.
     auto buffer = StringImpl::createUninitialized(requiredLength, m_bufferCharacters16);
     for (unsigned i = 0; i < m_length; ++i)
@@ -155,7 +141,8 @@ void StringBuilder::allocateBufferUpconvert(const LChar* currentCharacters, unsi
     ASSERT(m_buffer->length() == requiredLength);
 }
 
-template<> void StringBuilder::reallocateBuffer<LChar>(unsigned requiredLength)
+template <>
+void StringBuilder::reallocateBuffer<LChar>(unsigned requiredLength)
 {
     // If the buffer has only one ref (by this StringBuilder), reallocate it,
     // otherwise fall back to "allocate and copy" method.
@@ -171,14 +158,15 @@ template<> void StringBuilder::reallocateBuffer<LChar>(unsigned requiredLength)
     ASSERT(m_buffer->length() == requiredLength);
 }
 
-template<> void StringBuilder::reallocateBuffer<UChar>(unsigned requiredLength)
+template <>
+void StringBuilder::reallocateBuffer<UChar>(unsigned requiredLength)
 {
     // If the buffer has only one ref (by this StringBuilder), reallocate it,
     // otherwise fall back to "allocate and copy" method.
     m_string = String();
     
     if (m_buffer->is8Bit())
-        allocateBufferUpconvert(m_buffer->characters8(), requiredLength);
+        allocateBufferUpConvert(m_buffer->characters8(), requiredLength);
     else if (m_buffer->hasOneRef())
         m_buffer = StringImpl::reallocate(m_buffer.releaseNonNull(), requiredLength, m_bufferCharacters16);
     else
@@ -200,7 +188,7 @@ void StringBuilder::reserveCapacity(unsigned newCapacity)
         // Grow the string, if necessary.
         if (newCapacity > m_length) {
             if (!m_length) {
-                LChar* nullPlaceholder = nullptr;
+                LChar* nullPlaceholder = 0;
                 allocateBuffer(nullPlaceholder, newCapacity);
             } else if (m_string.is8Bit())
                 allocateBuffer(m_string.characters8(), newCapacity);
@@ -213,7 +201,8 @@ void StringBuilder::reserveCapacity(unsigned newCapacity)
 
 // Make 'length' additional capacity be available in m_buffer, update m_string & m_length,
 // return a pointer to the newly allocated storage.
-template<typename CharacterType> ALWAYS_INLINE CharacterType* StringBuilder::appendUninitialized(unsigned length)
+template <typename CharType>
+ALWAYS_INLINE CharType* StringBuilder::appendUninitialized(unsigned length)
 {
     ASSERT(length);
 
@@ -228,53 +217,35 @@ template<typename CharacterType> ALWAYS_INLINE CharacterType* StringBuilder::app
         unsigned currentLength = m_length;
         m_string = String();
         m_length = requiredLength;
-        return bufferCharacters<CharacterType>() + currentLength;
+        return getBufferCharacters<CharType>() + currentLength;
     }
-
-    return appendUninitializedSlow<CharacterType>(requiredLength);
+    
+    return appendUninitializedSlow<CharType>(requiredLength);
 }
 
 // Make 'length' additional capacity be available in m_buffer, update m_string & m_length,
 // return a pointer to the newly allocated storage.
-template<typename CharacterType> CharacterType* StringBuilder::appendUninitializedSlow(unsigned requiredLength)
+template <typename CharType>
+CharType* StringBuilder::appendUninitializedSlow(unsigned requiredLength)
 {
     ASSERT(requiredLength);
 
     if (m_buffer) {
         // If the buffer is valid it must be at least as long as the current builder contents!
         ASSERT(m_buffer->length() >= m_length);
-        reallocateBuffer<CharacterType>(expandedCapacity(capacity(), requiredLength));
+        
+        reallocateBuffer<CharType>(expandedCapacity(capacity(), requiredLength));
     } else {
         ASSERT(m_string.length() == m_length);
-        allocateBuffer(m_length ? m_string.characters<CharacterType>() : nullptr, expandedCapacity(capacity(), requiredLength));
+        allocateBuffer(m_length ? m_string.characters<CharType>() : 0, expandedCapacity(capacity(), requiredLength));
     }
     
-    auto* result = bufferCharacters<CharacterType>() + m_length;
+    CharType* result = getBufferCharacters<CharType>() + m_length;
     m_length = requiredLength;
     ASSERT(m_buffer->length() >= m_length);
     return result;
 }
 
-inline UChar* StringBuilder::appendUninitializedUpconvert(unsigned length)
-{
-    unsigned requiredLength = length + m_length;
-    if (requiredLength < length)
-        CRASH();
-
-    if (m_buffer) {
-        // If the buffer is valid it must be at least as long as the current builder contents!
-        ASSERT(m_buffer->length() >= m_length);
-        allocateBufferUpconvert(m_buffer->characters8(), expandedCapacity(capacity(), requiredLength));
-    } else {
-        ASSERT(m_string.length() == m_length);
-        allocateBufferUpconvert(m_string.isNull() ? nullptr : m_string.characters8(), expandedCapacity(capacity(), requiredLength));
-    }
-
-    auto* result = m_bufferCharacters16 + m_length;
-    m_length = requiredLength;
-    return result;
-}
-
 void StringBuilder::append(const UChar* characters, unsigned length)
 {
     if (!length)
@@ -283,16 +254,32 @@ void StringBuilder::append(const UChar* characters, unsigned length)
     ASSERT(characters);
 
     if (m_is8Bit) {
-        if (length == 1 && !(*characters & ~0xFF)) {
+        if (length == 1 && !(*characters & ~0xff)) {
             // Append as 8 bit character
             LChar lChar = static_cast<LChar>(*characters);
             append(&lChar, 1);
             return;
         }
-        memcpy(appendUninitializedUpconvert(length), characters, static_cast<size_t>(length) * sizeof(UChar));
+
+        // Calculate the new size of the builder after appending.
+        unsigned requiredLength = length + m_length;
+        if (requiredLength < length)
+            CRASH();
+        
+        if (m_buffer) {
+            // If the buffer is valid it must be at least as long as the current builder contents!
+            ASSERT(m_buffer->length() >= m_length);
+            
+            allocateBufferUpConvert(m_buffer->characters8(), expandedCapacity(capacity(), requiredLength));
+        } else {
+            ASSERT(m_string.length() == m_length);
+            allocateBufferUpConvert(m_string.isNull() ? 0 : m_string.characters8(), expandedCapacity(capacity(), requiredLength));
+        }
+
+        memcpy(m_bufferCharacters16 + m_length, characters, static_cast<size_t>(length) * sizeof(UChar));
+        m_length = requiredLength;
     } else
         memcpy(appendUninitialized<UChar>(length), characters, static_cast<size_t>(length) * sizeof(UChar));
-
     ASSERT(m_buffer->length() >= m_length);
 }
 
@@ -303,22 +290,19 @@ void StringBuilder::append(const LChar* characters, unsigned length)
     ASSERT(characters);
 
     if (m_is8Bit) {
-        auto* destination = appendUninitialized<LChar>(length);
-        // FIXME: How did we determine a threshold of 8 here was the right one?
-        // Also, this kind of optimization could be useful anywhere else we have a
-        // performance-sensitive code path that calls memcpy.
+        LChar* dest = appendUninitialized<LChar>(length);
         if (length > 8)
-            memcpy(destination, characters, length);
+            memcpy(dest, characters, static_cast<size_t>(length) * sizeof(LChar));
         else {
             const LChar* end = characters + length;
             while (characters < end)
-                *destination++ = *characters++;
+                *(dest++) = *(characters++);
         }
     } else {
-        auto* destination = appendUninitialized<UChar>(length);
+        UChar* dest = appendUninitialized<UChar>(length);
         const LChar* end = characters + length;
         while (characters < end)
-            *destination++ = *characters++;
+            *(dest++) = *(characters++);
     }
 }
 
@@ -401,58 +385,17 @@ void StringBuilder::shrinkToFit()
     }
 }
 
-template<typename LengthType, typename CharacterType> static LengthType quotedJSONStringLength(const CharacterType* input, unsigned length)
+template <typename OutputCharacterType, typename InputCharacterType>
+static void appendQuotedJSONStringInternal(OutputCharacterType*& output, const InputCharacterType* input, unsigned length)
 {
-    LengthType quotedLength = 2;
-    for (unsigned i = 0; i < length; ++i) {
-        auto character = input[i];
-        if (LIKELY(character > 0x1F)) {
-            switch (character) {
-            case '"':
-            case '\\':
-                quotedLength += 2;
-                break;
-            default:
-                ++quotedLength;
-                break;
-            }
-        } else {
-            switch (character) {
-            case '\t':
-            case '\r':
-            case '\n':
-            case '\f':
-            case '\b':
-                quotedLength += 2;
-                break;
-            default:
-                quotedLength += 6;
-            }
-        }
-    }
-    return quotedLength;
-}
-
-template<typename CharacterType> static inline unsigned quotedJSONStringLength(const CharacterType* input, unsigned length)
-{
-    constexpr auto maxSafeLength = (std::numeric_limits<unsigned>::max() - 2) / 6;
-    if (length <= maxSafeLength)
-        return quotedJSONStringLength<unsigned>(input, length);
-    return quotedJSONStringLength<Checked<unsigned>>(input, length).unsafeGet();
-}
-
-template<typename OutputCharacterType, typename InputCharacterType> static inline void appendQuotedJSONStringInternal(OutputCharacterType* output, const InputCharacterType* input, unsigned length)
-{
-    *output++ = '"';
-    for (unsigned i = 0; i < length; ++i) {
-        auto character = input[i];
-        if (LIKELY(character > 0x1F)) {
-            if (UNLIKELY(character == '"' || character == '\\'))
+    for (const InputCharacterType* end = input + length; input != end; ++input) {
+        if (LIKELY(*input > 0x1F)) {
+            if (*input == '"' || *input == '\\')
                 *output++ = '\\';
-            *output++ = character;
+            *output++ = *input;
             continue;
         }
-        switch (character) {
+        switch (*input) {
         case '\t':
             *output++ = '\\';
             *output++ = 't';
@@ -474,35 +417,56 @@ template<typename OutputCharacterType, typename InputCharacterType> static inlin
             *output++ = 'b';
             break;
         default:
-            ASSERT(!(character & ~0xFF));
+            ASSERT((*input & 0xFF00) == 0);
+            static const char hexDigits[] = "0123456789abcdef";
             *output++ = '\\';
             *output++ = 'u';
             *output++ = '0';
             *output++ = '0';
-            *output++ = upperNibbleToLowercaseASCIIHexDigit(character);
-            *output++ = lowerNibbleToLowercaseASCIIHexDigit(character);
+            *output++ = static_cast<LChar>(hexDigits[(*input >> 4) & 0xF]);
+            *output++ = static_cast<LChar>(hexDigits[*input & 0xF]);
             break;
         }
     }
-    *output = '"';
 }
 
-void StringBuilder::appendQuotedJSONString(StringView string)
+void StringBuilder::appendQuotedJSONString(const String& string)
 {
-    unsigned length = string.length();
-    if (string.is8Bit()) {
-        auto* characters = string.characters8();
-        if (m_is8Bit)
-            appendQuotedJSONStringInternal(appendUninitialized<LChar>(quotedJSONStringLength(characters, length)), characters, length);
-        else
-            appendQuotedJSONStringInternal(appendUninitialized<UChar>(quotedJSONStringLength(characters, length)), characters, length);
+    // Make sure we have enough buffer space to append this string without having
+    // to worry about reallocating in the middle.
+    // The 2 is for the '"' quotes on each end.
+    // The 6 is for characters that need to be \uNNNN encoded.
+    Checked<unsigned> stringLength = string.length();
+    Checked<unsigned> maximumCapacityRequired = length();
+    maximumCapacityRequired += 2 + stringLength * 6;
+    unsigned allocationSize = maximumCapacityRequired.unsafeGet();
+    // This max() is here to allow us to allocate sizes between the range [2^31, 2^32 - 2] because roundUpToPowerOfTwo(1<<31 + some int smaller than 1<<31) == 0.
+    allocationSize = std::max(allocationSize, roundUpToPowerOfTwo(allocationSize));
+
+    if (is8Bit() && !string.is8Bit())
+        allocateBufferUpConvert(m_bufferCharacters8, allocationSize);
+    else
+        reserveCapacity(allocationSize);
+    ASSERT(m_buffer->length() >= allocationSize);
+
+    if (is8Bit()) {
+        ASSERT(string.is8Bit());
+        LChar* output = m_bufferCharacters8 + m_length;
+        *output++ = '"';
+        appendQuotedJSONStringInternal(output, string.characters8(), string.length());
+        *output++ = '"';
+        m_length = output - m_bufferCharacters8;
     } else {
-        auto* characters = string.characters16();
-        if (m_is8Bit)
-            appendQuotedJSONStringInternal(appendUninitializedUpconvert(quotedJSONStringLength(characters, length)), characters, length);
+        UChar* output = m_bufferCharacters16 + m_length;
+        *output++ = '"';
+        if (string.is8Bit())
+            appendQuotedJSONStringInternal(output, string.characters8(), string.length());
         else
-            appendQuotedJSONStringInternal(appendUninitialized<UChar>(quotedJSONStringLength(characters, length)), characters, length);
+            appendQuotedJSONStringInternal(output, string.characters16(), string.length());
+        *output++ = '"';
+        m_length = output - m_bufferCharacters16;
     }
+    ASSERT(m_buffer->length() >= m_length);
 }
 
 } // namespace WTF
index 1870649..a8dada7 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2009-2016 Apple Inc. All rights reserved.
+ * Copyright (C) 2009-2010, 2012-2013, 2016 Apple Inc. All rights reserved.
  * Copyright (C) 2012 Google Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
  */
 
-#pragma once
+#ifndef StringBuilder_h
+#define StringBuilder_h
 
+#include <wtf/text/AtomicString.h>
 #include <wtf/text/StringView.h>
+#include <wtf/text/WTFString.h>
 
 namespace WTF {
 
 class StringBuilder {
-    // Disallow copying since it's expensive and we don't want anyone to do it by accident.
+    // Disallow copying since it's expensive and we don't want code to do it by accident.
     WTF_MAKE_NONCOPYABLE(StringBuilder);
 
 public:
-    StringBuilder() = default;
+    StringBuilder()
+        : m_length(0)
+        , m_is8Bit(true)
+        , m_bufferCharacters8(0)
+    {
+    }
 
     WTF_EXPORT_PRIVATE void append(const UChar*, unsigned);
     WTF_EXPORT_PRIVATE void append(const LChar*, unsigned);
 
     ALWAYS_INLINE void append(const char* characters, unsigned length) { append(reinterpret_cast<const LChar*>(characters), length); }
 
-    void append(const AtomicString& atomicString) { append(atomicString.string()); }
+    void append(const AtomicString& atomicString)
+    {
+        append(atomicString.string());
+    }
 
     void append(const String& string)
     {
-        unsigned length = string.length();
-        if (!length)
+        if (!string.length())
             return;
 
-        // If we're appending to an empty string, and there is not a buffer
-        // (reserveCapacity has not been called) then just retain the string.
+        // If we're appending to an empty string, and there is not a buffer (reserveCapacity has not been called)
+        // then just retain the string.
         if (!m_length && !m_buffer) {
             m_string = string;
-            m_length = length;
-            m_is8Bit = string.is8Bit();
+            m_length = string.length();
+            m_is8Bit = m_string.is8Bit();
             return;
         }
 
         if (string.is8Bit())
-            append(string.characters8(), length);
+            append(string.characters8(), string.length());
         else
-            append(string.characters16(), length);
+            append(string.characters16(), string.length());
     }
 
     void append(const StringBuilder& other)
@@ -70,12 +80,11 @@ public:
         if (!other.m_length)
             return;
 
-        // If we're appending to an empty string, and there is not a buffer
-        // (reserveCapacity has not been called) then just retain the string.
+        // If we're appending to an empty string, and there is not a buffer (reserveCapacity has not been called)
+        // then just retain the string.
         if (!m_length && !m_buffer && !other.m_string.isNull()) {
             m_string = other.m_string;
             m_length = other.m_length;
-            m_is8Bit = other.m_is8Bit;
             return;
         }
 
@@ -96,27 +105,17 @@ public:
 #if USE(CF)
     WTF_EXPORT_PRIVATE void append(CFStringRef);
 #endif
-
 #if USE(CF) && defined(__OBJC__)
     void append(NSString *string) { append((__bridge CFStringRef)string); }
 #endif
     
     void append(const String& string, unsigned offset, unsigned length)
     {
-        ASSERT(offset <= string.length());
-        ASSERT(offset + length <= string.length());
-
-        if (!length)
+        if (!string.length())
             return;
 
-        // If we're appending to an empty string, and there is not a buffer
-        // (reserveCapacity has not been called) then just retain the string.
-        if (!offset && !m_length && !m_buffer && length == string.length()) {
-            m_string = string;
-            m_length = length;
-            m_is8Bit = string.is8Bit();
+        if ((offset + length) > string.length())
             return;
-        }
 
         if (string.is8Bit())
             append(string.characters8() + offset, length);
@@ -130,33 +129,37 @@ public:
             append(characters, strlen(characters));
     }
 
-    void append(UChar character)
+    void append(UChar c)
     {
         if (m_buffer && m_length < m_buffer->length() && m_string.isNull()) {
             if (!m_is8Bit) {
-                m_bufferCharacters16[m_length++] = character;
+                m_bufferCharacters16[m_length++] = c;
                 return;
             }
-            if (!(character & ~0xFF)) {
-                m_bufferCharacters8[m_length++] = static_cast<LChar>(character);
+
+            if (!(c & ~0xff)) {
+                m_bufferCharacters8[m_length++] = static_cast<LChar>(c);
                 return;
             }
         }
-        append(&character, 1);
+        append(&c, 1);
     }
 
-    void append(LChar character)
+    void append(LChar c)
     {
         if (m_buffer && m_length < m_buffer->length() && m_string.isNull()) {
             if (m_is8Bit)
-                m_bufferCharacters8[m_length++] = character;
+                m_bufferCharacters8[m_length++] = c;
             else
-                m_bufferCharacters16[m_length++] = character;
+                m_bufferCharacters16[m_length++] = c;
         } else
-            append(&character, 1);
+            append(&c, 1);
     }
 
-    void append(char character) { append(static_cast<LChar>(character)); }
+    void append(char c)
+    {
+        append(static_cast<LChar>(c));
+    }
 
     void append(UChar32 c)
     {
@@ -168,9 +171,10 @@ public:
         append(U16_TRAIL(c));
     }
 
-    WTF_EXPORT_PRIVATE void appendQuotedJSONString(StringView);
+    WTF_EXPORT_PRIVATE void appendQuotedJSONString(const String&);
 
-    template<unsigned charactersCount> ALWAYS_INLINE void appendLiteral(const char (&characters)[charactersCount]) { append(characters, charactersCount - 1); }
+    template<unsigned charactersCount>
+    ALWAYS_INLINE void appendLiteral(const char (&characters)[charactersCount]) { append(characters, charactersCount - 1); }
 
     WTF_EXPORT_PRIVATE void appendNumber(int);
     WTF_EXPORT_PRIVATE void appendNumber(unsigned int);
@@ -216,15 +220,24 @@ public:
         return AtomicString(m_buffer.get(), 0, m_length);
     }
 
-    unsigned length() const { return m_length; }
+    unsigned length() const
+    {
+        return m_length;
+    }
+
     bool isEmpty() const { return !m_length; }
 
     WTF_EXPORT_PRIVATE void reserveCapacity(unsigned newCapacity);
 
-    unsigned capacity() const { return m_buffer ? m_buffer->length() : m_length; }
+    unsigned capacity() const
+    {
+        return m_buffer ? m_buffer->length() : m_length;
+    }
 
     WTF_EXPORT_PRIVATE void resize(unsigned newSize);
+
     WTF_EXPORT_PRIVATE bool canShrink() const;
+
     WTF_EXPORT_PRIVATE void shrinkToFit();
 
     UChar operator[](unsigned i) const
@@ -281,36 +294,43 @@ public:
 private:
     void allocateBuffer(const LChar* currentCharacters, unsigned requiredLength);
     void allocateBuffer(const UChar* currentCharacters, unsigned requiredLength);
-    void allocateBufferUpconvert(const LChar* currentCharacters, unsigned requiredLength);
-    template<typename CharacterType> void reallocateBuffer(unsigned requiredLength);
-    UChar* appendUninitializedUpconvert(unsigned length);
-    template<typename CharacterType> CharacterType* appendUninitialized(unsigned length);
-    template<typename CharacterType> CharacterType* appendUninitializedSlow(unsigned length);
-    template<typename CharacterType> CharacterType* bufferCharacters();
+    void allocateBufferUpConvert(const LChar* currentCharacters, unsigned requiredLength);
+    template <typename CharType>
+    void reallocateBuffer(unsigned requiredLength);
+    template <typename CharType>
+    ALWAYS_INLINE CharType* appendUninitialized(unsigned length);
+    template <typename CharType>
+    CharType* appendUninitializedSlow(unsigned length);
+    template <typename CharType>
+    ALWAYS_INLINE CharType * getBufferCharacters();
     WTF_EXPORT_PRIVATE void reifyString() const;
 
-    unsigned m_length { 0 };
+    unsigned m_length;
     mutable String m_string;
     RefPtr<StringImpl> m_buffer;
-    bool m_is8Bit { true };
+    bool m_is8Bit;
     union {
-        LChar* m_bufferCharacters8 { nullptr };
+        LChar* m_bufferCharacters8;
         UChar* m_bufferCharacters16;
     };
 };
 
-template<typename StringType> bool equal(const StringBuilder&, const StringType&);
-bool equal(const StringBuilder&, const String&); // Only needed because is8Bit dereferences nullptr when the string is null.
-template<typename CharacterType> bool equal(const StringBuilder&, const CharacterType*, unsigned length);
+template <>
+ALWAYS_INLINE LChar* StringBuilder::getBufferCharacters<LChar>()
+{
+    ASSERT(m_is8Bit);
+    return m_bufferCharacters8;
+}
 
-bool operator==(const StringBuilder&, const StringBuilder&);
-bool operator!=(const StringBuilder&, const StringBuilder&);
-bool operator==(const StringBuilder&, const String&);
-bool operator!=(const StringBuilder&, const String&);
-bool operator==(const String&, const StringBuilder&);
-bool operator!=(const String&, const StringBuilder&);
+template <>
+ALWAYS_INLINE UChar* StringBuilder::getBufferCharacters<UChar>()
+{
+    ASSERT(!m_is8Bit);
+    return m_bufferCharacters16;
+}    
 
-template<typename CharacterType> inline bool equal(const StringBuilder& s, const CharacterType* buffer, unsigned length)
+template <typename CharType>
+bool equal(const StringBuilder& s, const CharType* buffer, unsigned length)
 {
     if (s.length() != length)
         return false;
@@ -321,14 +341,24 @@ template<typename CharacterType> inline bool equal(const StringBuilder& s, const
     return equal(s.characters16(), buffer, length);
 }
 
-template<typename StringType> inline bool equal(const StringBuilder& a, const StringType& b)
+template <typename StringType>
+bool equal(const StringBuilder& a, const StringType& b)
 {
-    return equalCommon(a, b);
-}
+    if (a.length() != b.length())
+        return false;
 
-inline bool equal(const StringBuilder& a, const String& b)
-{
-    return !b.isNull() && equalCommon(a, b);
+    if (!a.length())
+        return true;
+
+    if (a.is8Bit()) {
+        if (b.is8Bit())
+            return equal(a.characters8(), b.characters8(), a.length());
+        return equal(a.characters8(), b.characters16(), a.length());
+    }
+
+    if (b.is8Bit())
+        return equal(a.characters16(), b.characters8(), a.length());
+    return equal(a.characters16(), b.characters16(), a.length());
 }
 
 inline bool operator==(const StringBuilder& a, const StringBuilder& b) { return equal(a, b); }
@@ -341,3 +371,5 @@ inline bool operator!=(const String& a, const StringBuilder& b) { return !equal(
 } // namespace WTF
 
 using WTF::StringBuilder;
+
+#endif // StringBuilder_h
index a01e118..af24e45 100644 (file)
@@ -1,3 +1,23 @@
+2016-11-29  Commit Queue  <commit-queue@webkit.org>
+
+        Unreviewed, rolling out r209058 and r209074.
+        https://bugs.webkit.org/show_bug.cgi?id=165188
+
+        These changes caused API test StringBuilderTest.Equal to crash
+        and/or fail. (Requested by ryanhaddad on #webkit).
+
+        Reverted changesets:
+
+        "Streamline and speed up tokenizer and segmented string
+        classes"
+        https://bugs.webkit.org/show_bug.cgi?id=165003
+        http://trac.webkit.org/changeset/209058
+
+        "REGRESSION (r209058): API test StringBuilderTest.Equal
+        crashing"
+        https://bugs.webkit.org/show_bug.cgi?id=165142
+        http://trac.webkit.org/changeset/209074
+
 2016-11-29  Nan Wang  <n_wang@apple.com>
 
         AX: ARIA tree & treeitem roles & aria-expanded state not spoken to VoiceOver iOS 10
index e916b9c..476660f 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007-2016 Apple Inc. All rights reserved.
+ * Copyright (C) 2007-2009, 2016 Apple Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
 #include "config.h"
 #include "JSHTMLDocument.h"
 
+#include "Frame.h"
+#include "HTMLCollection.h"
+#include "HTMLDocument.h"
+#include "HTMLElement.h"
 #include "HTMLIFrameElement.h"
+#include "HTMLNames.h"
+#include "JSDOMWindow.h"
 #include "JSDOMWindowCustom.h"
+#include "JSDOMWindowShell.h"
+#include "JSDocumentCustom.h"
 #include "JSHTMLCollection.h"
+#include "JSMainThreadExecState.h"
 #include "SegmentedString.h"
+#include "DocumentParser.h"
+#include <interpreter/StackVisitor.h>
+#include <runtime/Error.h>
+#include <runtime/JSCell.h>
+#include <wtf/unicode/CharacterNames.h>
 
 using namespace JSC;
 
@@ -40,8 +54,10 @@ using namespace HTMLNames;
 JSValue toJSNewlyCreated(ExecState* state, JSDOMGlobalObject* globalObject, Ref<HTMLDocument>&& passedDocument)
 {
     auto& document = passedDocument.get();
-    auto* wrapper = createWrapper<HTMLDocument>(globalObject, WTFMove(passedDocument));
+    JSObject* wrapper = createWrapper<HTMLDocument>(globalObject, WTFMove(passedDocument));
+
     reportMemoryForDocumentIfFrameless(*state, document);
+
     return wrapper;
 }
 
@@ -52,51 +68,52 @@ JSValue toJS(ExecState* state, JSDOMGlobalObject* globalObject, HTMLDocument& do
     return toJSNewlyCreated(state, globalObject, Ref<HTMLDocument>(document));
 }
 
-bool JSHTMLDocument::getOwnPropertySlot(JSObject* object, ExecState* state, PropertyName propertyName, PropertySlot& slot)
+bool JSHTMLDocument::getOwnPropertySlot(JSObject* object, ExecState* exec, PropertyName propertyName, PropertySlot& slot)
 {
-    auto& thisObject = *jsCast<JSHTMLDocument*>(object);
-    ASSERT_GC_OBJECT_INHERITS((&thisObject), info());
+    JSHTMLDocument* thisObject = jsCast<JSHTMLDocument*>(object);
+    ASSERT_GC_OBJECT_INHERITS(thisObject, info());
 
     if (propertyName == "open") {
-        if (Base::getOwnPropertySlot(&thisObject, state, propertyName, slot))
+        if (Base::getOwnPropertySlot(thisObject, exec, propertyName, slot))
             return true;
-        slot.setCustom(&thisObject, ReadOnly | DontDelete | DontEnum, nonCachingStaticFunctionGetter<jsHTMLDocumentPrototypeFunctionOpen, 2>);
+
+        slot.setCustom(thisObject, ReadOnly | DontDelete | DontEnum, nonCachingStaticFunctionGetter<jsHTMLDocumentPrototypeFunctionOpen, 2>);
         return true;
     }
 
     JSValue value;
-    if (thisObject.nameGetter(state, propertyName, value)) {
-        slot.setValue(&thisObject, ReadOnly | DontDelete | DontEnum, value);
+    if (thisObject->nameGetter(exec, propertyName, value)) {
+        slot.setValue(thisObject, ReadOnly | DontDelete | DontEnum, value);
         return true;
     }
 
-    return Base::getOwnPropertySlot(&thisObject, state, propertyName, slot);
+    return Base::getOwnPropertySlot(thisObject, exec, propertyName, slot);
 }
 
-bool JSHTMLDocument::nameGetter(ExecState* state, PropertyName propertyName, JSValue& value)
+bool JSHTMLDocument::nameGetter(ExecState* exec, PropertyName propertyName, JSValue& value)
 {
     auto& document = wrapped();
 
-    auto* atomicPropertyName = propertyName.publicName();
+    AtomicStringImpl* atomicPropertyName = propertyName.publicName();
     if (!atomicPropertyName || !document.hasDocumentNamedItem(*atomicPropertyName))
         return false;
 
     if (UNLIKELY(document.documentNamedItemContainsMultipleElements(*atomicPropertyName))) {
-        auto collection = document.documentNamedItems(atomicPropertyName);
+        Ref<HTMLCollection> collection = document.documentNamedItems(atomicPropertyName);
         ASSERT(collection->length() > 1);
-        value = toJS(state, globalObject(), collection);
+        value = toJS(exec, globalObject(), collection);
         return true;
     }
 
-    auto& element = *document.documentNamedItem(*atomicPropertyName);
+    Element& element = *document.documentNamedItem(*atomicPropertyName);
     if (UNLIKELY(is<HTMLIFrameElement>(element))) {
-        if (auto* frame = downcast<HTMLIFrameElement>(element).contentFrame()) {
-            value = toJS(state, frame);
+        if (Frame* frame = downcast<HTMLIFrameElement>(element).contentFrame()) {
+            value = toJS(exec, frame);
             return true;
         }
     }
 
-    value = toJS(state, globalObject(), element);
+    value = toJS(exec, globalObject(), element);
     return true;
 }
 
@@ -105,8 +122,9 @@ bool JSHTMLDocument::nameGetter(ExecState* state, PropertyName propertyName, JSV
 JSValue JSHTMLDocument::all(ExecState& state) const
 {
     // If "all" has been overwritten, return the overwritten value
-    if (auto overwrittenValue = getDirect(state.vm(), Identifier::fromString(&state, "all")))
-        return overwrittenValue;
+    JSValue v = getDirect(state.vm(), Identifier::fromString(&state, "all"));
+    if (v)
+        return v;
 
     return toJS(&state, globalObject(), wrapped().all());
 }
@@ -117,14 +135,15 @@ void JSHTMLDocument::setAll(ExecState& state, JSValue value)
     putDirect(state.vm(), Identifier::fromString(&state, "all"), value);
 }
 
-static inline Document* findCallingDocument(ExecState& state)
+static Document* findCallingDocument(ExecState& state)
 {
     CallerFunctor functor;
     state.iterate(functor);
-    auto* callerFrame = functor.callerFrame();
+    CallFrame* callerFrame = functor.callerFrame();
     if (!callerFrame)
         return nullptr;
-    return asJSDOMWindow(callerFrame->lexicalGlobalObject())->wrapped().document();
+
+    return asJSDOMWindow(functor.callerFrame()->lexicalGlobalObject())->wrapped().document();
 }
 
 // Custom functions
@@ -136,11 +155,12 @@ JSValue JSHTMLDocument::open(ExecState& state)
 
     // For compatibility with other browsers, pass open calls with more than 2 parameters to the window.
     if (state.argumentCount() > 2) {
-        if (auto* frame = wrapped().frame()) {
-            if (auto* wrapper = toJSDOMWindowShell(frame, currentWorld(&state))) {
-                auto function = wrapper->get(&state, Identifier::fromString(&state, "open"));
+        if (Frame* frame = wrapped().frame()) {
+            JSDOMWindowShell* wrapper = toJSDOMWindowShell(frame, currentWorld(&state));
+            if (wrapper) {
+                JSValue function = wrapper->get(&state, Identifier::fromString(&state, "open"));
                 CallData callData;
-                auto callType = ::getCallData(function, callData);
+                CallType callType = ::getCallData(function, callData);
                 if (callType == CallType::None)
                     return throwTypeError(&state, scope);
                 return JSC::call(&state, function, callType, callData, wrapper, ArgList(&state));
@@ -149,41 +169,53 @@ JSValue JSHTMLDocument::open(ExecState& state)
         return jsUndefined();
     }
 
-    // Calling document.open clobbers the security context of the document and aliases it with the active security context.
-    // FIXME: Is it correct that this does not use findCallingDocument as the write function below does?
-    wrapped().open(asJSDOMWindow(state.lexicalGlobalObject())->wrapped().document());
-    // FIXME: Why do we return the document instead of returning undefined?
+    // document.open clobbers the security context of the document and
+    // aliases it with the active security context.
+    Document* activeDocument = asJSDOMWindow(state.lexicalGlobalObject())->wrapped().document();
+
+    // In the case of two parameters or fewer, do a normal document open.
+    wrapped().open(activeDocument);
     return this;
 }
 
 enum NewlineRequirement { DoNotAddNewline, DoAddNewline };
 
-static inline JSValue documentWrite(ExecState& state, JSHTMLDocument& document, NewlineRequirement addNewline)
+static inline void documentWrite(ExecState& state, JSHTMLDocument* thisDocument, NewlineRequirement addNewline)
 {
-    VM& vm = state.vm();
-    auto scope = DECLARE_THROW_SCOPE(vm);
-
-    SegmentedString segmentedString;
-    size_t argumentCount = state.argumentCount();
-    for (size_t i = 0; i < argumentCount; ++i) {
-        segmentedString.append(state.uncheckedArgument(i).toWTFString(&state));
-        RETURN_IF_EXCEPTION(scope, { });
+    HTMLDocument* document = &thisDocument->wrapped();
+    // DOM only specifies single string argument, but browsers allow multiple or no arguments.
+
+    size_t size = state.argumentCount();
+
+    String firstString = state.argument(0).toString(&state)->value(&state);
+    SegmentedString segmentedString = firstString;
+    if (size != 1) {
+        if (!size)
+            segmentedString.clear();
+        else {
+            for (size_t i = 1; i < size; ++i) {
+                String subsequentString = state.uncheckedArgument(i).toString(&state)->value(&state);
+                segmentedString.append(SegmentedString(subsequentString));
+            }
+        }
     }
     if (addNewline)
-        segmentedString.append(String { "\n" });
+        segmentedString.append(SegmentedString(String(&newlineCharacter, 1)));
 
-    document.wrapped().write(WTFMove(segmentedString), findCallingDocument(state));
-    return jsUndefined();
+    Document* activeDocument = findCallingDocument(state);
+    document->write(segmentedString, activeDocument);
 }
 
 JSValue JSHTMLDocument::write(ExecState& state)
 {
-    return documentWrite(state, *this, DoNotAddNewline);
+    documentWrite(state, this, DoNotAddNewline);
+    return jsUndefined();
 }
 
 JSValue JSHTMLDocument::writeln(ExecState& state)
 {
-    return documentWrite(state, *this, DoAddNewline);
+    documentWrite(state, this, DoAddNewline);
+    return jsUndefined();
 }
 
 } // namespace WebCore
index de2bb94..70a6e7b 100644 (file)
@@ -35,7 +35,6 @@
 #include "CSSParserTokenRange.h"
 #include "CSSTokenizerInputStream.h"
 #include "HTMLParserIdioms.h"
-#include <wtf/text/StringBuilder.h>
 #include <wtf/unicode/CharacterNames.h>
 
 namespace WebCore {
index 91e92dc..120cf8e 100644 (file)
@@ -30,6 +30,7 @@
 #pragma once
 
 #include "CSSParserToken.h"
+#include "InputStreamPreprocessor.h"
 #include <climits>
 #include <wtf/text/StringView.h>
 #include <wtf/text/WTFString.h>
index 93f7a1d..1bc371e 100644 (file)
 #pragma once
 
 #include <wtf/text/StringView.h>
+#include <wtf/text/WTFString.h>
 
 namespace WebCore {
 
-constexpr LChar kEndOfFileMarker = 0;
-
 class CSSTokenizerInputStream {
     WTF_MAKE_NONCOPYABLE(CSSTokenizerInputStream);
     WTF_MAKE_FAST_ALLOCATED;
index fee5792..f545ee9 100644 (file)
@@ -2791,7 +2791,7 @@ Seconds Document::timeSinceDocumentCreation() const
     return MonotonicTime::now() - m_documentCreationTime;
 }
 
-void Document::write(SegmentedString&& text, Document* ownerDocument)
+void Document::write(const SegmentedString& text, Document* ownerDocument)
 {
     NestingLevelIncrementer nestingLevelIncrementer(m_writeRecursionDepth);
 
@@ -2799,7 +2799,7 @@ void Document::write(SegmentedString&& text, Document* ownerDocument)
     m_writeRecursionIsTooDeep = (m_writeRecursionDepth > cMaxWriteRecursionDepth) || m_writeRecursionIsTooDeep;
 
     if (m_writeRecursionIsTooDeep)
-        return;
+       return;
 
     bool hasInsertionPoint = m_parser && m_parser->hasInsertionPoint();
     if (!hasInsertionPoint && (m_ignoreOpensDuringUnloadCount || m_ignoreDestructiveWriteCount))
@@ -2809,19 +2809,18 @@ void Document::write(SegmentedString&& text, Document* ownerDocument)
         open(ownerDocument);
 
     ASSERT(m_parser);
-    m_parser->insert(WTFMove(text));
+    m_parser->insert(text);
 }
 
 void Document::write(const String& text, Document* ownerDocument)
 {
-    write(SegmentedString { text }, ownerDocument);
+    write(SegmentedString(text), ownerDocument);
 }
 
 void Document::writeln(const String& text, Document* ownerDocument)
 {
-    SegmentedString textWithNewline { text };
-    textWithNewline.append(String { "\n" });
-    write(WTFMove(textWithNewline), ownerDocument);
+    write(text, ownerDocument);
+    write("\n", ownerDocument);
 }
 
 std::chrono::milliseconds Document::minimumTimerInterval() const
index 50e2b83..a425583 100644 (file)
@@ -602,7 +602,7 @@ public:
 
     void cancelParsing();
 
-    void write(SegmentedString&& text, Document* ownerDocument = nullptr);
+    void write(const SegmentedString& text, Document* ownerDocument = nullptr);
     WEBCORE_EXPORT void write(const String& text, Document* ownerDocument = nullptr);
     WEBCORE_EXPORT void writeln(const String& text, Document* ownerDocument = nullptr);
 
index 0828c36..8f30a61 100644 (file)
@@ -43,7 +43,7 @@ public:
     virtual bool hasInsertionPoint() { return true; }
 
     // insert is used by document.write.
-    virtual void insert(SegmentedString&&) = 0;
+    virtual void insert(const SegmentedString&) = 0;
 
     // appendBytes and flush are used by DocumentWriter (the loader).
     virtual void appendBytes(DocumentWriter&, const char* bytes, size_t length) = 0;
index 094ec87..77266e2 100644 (file)
@@ -49,7 +49,7 @@ private:
         appendBytes(writer, 0, 0);
     }
 
-    void insert(SegmentedString&&) override
+    void insert(const SegmentedString&) override
     {
         // <https://bugs.webkit.org/show_bug.cgi?id=25397>: JS code can always call document.write, we need to handle it.
         ASSERT_NOT_REACHED();
index de81d42..cc57281 100644 (file)
@@ -344,6 +344,8 @@ void FTPDirectoryDocumentParser::createBasicDocument()
 
 void FTPDirectoryDocumentParser::append(RefPtr<StringImpl>&& inputSource)
 {
+    String source(WTFMove(inputSource));
+
     // Make sure we have the table element to append to by loading the template set in the pref, or
     // creating a very basic document with the appropriate table
     if (!m_tableElement) {
@@ -355,9 +357,9 @@ void FTPDirectoryDocumentParser::append(RefPtr<StringImpl>&& inputSource)
     bool foundNewLine = false;
 
     m_dest = m_buffer;
-    SegmentedString string { String { WTFMove(inputSource) } };
-    while (!string.isEmpty()) {
-        UChar c = string.currentCharacter();
+    SegmentedString str = source;
+    while (!str.isEmpty()) {
+        UChar c = str.currentChar();
 
         if (c == '\r') {
             *m_dest++ = '\n';
@@ -374,7 +376,7 @@ void FTPDirectoryDocumentParser::append(RefPtr<StringImpl>&& inputSource)
             m_skipLF = false;
         }
 
-        string.advance();
+        str.advance();
 
         // Maybe enlarge the buffer
         checkBuffer();
index 06a41e9..27111fb 100644 (file)
@@ -328,7 +328,7 @@ bool HTMLDocumentParser::hasInsertionPoint()
     return m_input.hasInsertionPoint() || (wasCreatedByScript() && !m_input.haveSeenEndOfFile());
 }
 
-void HTMLDocumentParser::insert(SegmentedString&& source)
+void HTMLDocumentParser::insert(const SegmentedString& source)
 {
     if (isStopped())
         return;
@@ -337,8 +337,9 @@ void HTMLDocumentParser::insert(SegmentedString&& source)
     // but we need to ensure it isn't deleted yet.
     Ref<HTMLDocumentParser> protectedThis(*this);
 
-    source.setExcludeLineNumbers();
-    m_input.insertAtCurrentInsertionPoint(WTFMove(source));
+    SegmentedString excludedLineNumberSource(source);
+    excludedLineNumberSource.setExcludeLineNumbers();
+    m_input.insertAtCurrentInsertionPoint(excludedLineNumberSource);
     pumpTokenizerIfPossible(ForceSynchronous);
 
     if (isWaitingForScripts()) {
@@ -362,7 +363,7 @@ void HTMLDocumentParser::append(RefPtr<StringImpl>&& inputSource)
     // but we need to ensure it isn't deleted yet.
     Ref<HTMLDocumentParser> protectedThis(*this);
 
-    String source { WTFMove(inputSource) };
+    String source(WTFMove(inputSource));
 
     if (m_preloadScanner) {
         if (m_input.current().isEmpty() && !isWaitingForScripts()) {
index e97ba56..8ad0081 100644 (file)
@@ -65,7 +65,7 @@ public:
 protected:
     explicit HTMLDocumentParser(HTMLDocument&);
 
-    void insert(SegmentedString&&) final;
+    void insert(const SegmentedString&) final;
     void append(RefPtr<StringImpl>&&) override;
     void finish() override;
 
index 98503d9..dbffadf 100644 (file)
@@ -61,12 +61,12 @@ public:
         StringBuilder consumedCharacters;
         HTMLEntitySearch entitySearch;
         while (!source.isEmpty()) {
-            cc = source.currentCharacter();
+            cc = source.currentChar();
             entitySearch.advance(cc);
             if (!entitySearch.isEntityPrefix())
                 break;
             consumedCharacters.append(cc);
-            source.advancePastNonNewline();
+            source.advance();
         }
         notEnoughCharacters = source.isEmpty();
         if (notEnoughCharacters) {
@@ -88,13 +88,13 @@ public:
             const int length = entitySearch.mostRecentMatch()->length;
             const LChar* reference = entitySearch.mostRecentMatch()->entity;
             for (int i = 0; i < length; ++i) {
-                cc = source.currentCharacter();
+                cc = source.currentChar();
                 ASSERT_UNUSED(reference, cc == *reference++);
                 consumedCharacters.append(cc);
-                source.advancePastNonNewline();
+                source.advance();
                 ASSERT(!source.isEmpty());
             }
-            cc = source.currentCharacter();
+            cc = source.currentChar();
         }
         if (entitySearch.mostRecentMatch()->lastCharacter() == ';'
             || !additionalAllowedCharacter
index f86e8f8..e21189f 100644 (file)
@@ -25,6 +25,7 @@
 
 #pragma once
 
+#include "InputStreamPreprocessor.h"
 #include "SegmentedString.h"
 #include <wtf/text/TextPosition.h>
 
@@ -55,14 +56,14 @@ public:
     {
     }
 
-    void appendToEnd(SegmentedString&& string)
+    void appendToEnd(const SegmentedString& string)
     {
-        m_last->append(WTFMove(string));
+        m_last->append(string);
     }
 
-    void insertAtCurrentInsertionPoint(SegmentedString&& string)
+    void insertAtCurrentInsertionPoint(const SegmentedString& string)
     {
-        m_first.append(WTFMove(string));
+        m_first.append(string);
     }
 
     bool hasInsertionPoint() const
@@ -72,7 +73,7 @@ public:
 
     void markEndOfFile()
     {
-        m_last->append(String { &kEndOfFileMarker, 1 });
+        m_last->append(SegmentedString(String(&kEndOfFileMarker, 1)));
         m_last->close();
     }
 
@@ -91,7 +92,8 @@ public:
 
     void splitInto(SegmentedString& next)
     {
-        next = WTFMove(m_first);
+        next = m_first;
+        m_first = SegmentedString();
         if (m_last == &m_first) {
             // We used to only have one SegmentedString in the InputStream
             // but now we have two.  That means m_first is no longer also
index 8a84689..af9fd50 100644 (file)
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2010 Google Inc. All Rights Reserved.
- * Copyright (C) 2015-2016 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2015 Apple Inc. All Rights Reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -151,9 +151,9 @@ bool HTMLMetaCharsetParser::checkForMetaCharset(const char* data, size_t length)
     // that are disallowed in <head>, we don't bail out until we've checked at
     // least bytesToCheckUnconditionally bytes of input.
 
-    constexpr int bytesToCheckUnconditionally = 1024;
+    static const int bytesToCheckUnconditionally = 1024;
 
-    m_input.append(m_codec->decode(data, length));
+    m_input.append(SegmentedString(m_codec->decode(data, length)));
 
     while (auto token = m_tokenizer.nextToken(m_input)) {
         bool isEnd = token->type() == HTMLToken::EndTag;
index 22dba90..783047b 100644 (file)
 
 namespace WebCore {
 
+HTMLSourceTracker::HTMLSourceTracker()
+{
+}
+
 void HTMLSourceTracker::startToken(SegmentedString& currentInput, HTMLTokenizer& tokenizer)
 {
     if (!m_started) {
@@ -74,12 +78,12 @@ String HTMLSourceTracker::source(const HTMLToken& token)
 
     unsigned i = 0;
     for ( ; i < length && !m_previousSource.isEmpty(); ++i) {
-        source.append(m_previousSource.currentCharacter());
+        source.append(m_previousSource.currentChar());
         m_previousSource.advance();
     }
     for ( ; i < length; ++i) {
         ASSERT(!m_currentSource.isEmpty());
-        source.append(m_currentSource.currentCharacter());
+        source.append(m_currentSource.currentChar());
         m_currentSource.advance();
     }
 
index 3a24cf0..897e83d 100644 (file)
@@ -36,7 +36,7 @@ class HTMLTokenizer;
 class HTMLSourceTracker {
     WTF_MAKE_NONCOPYABLE(HTMLSourceTracker);
 public:
-    HTMLSourceTracker() = default;
+    HTMLSourceTracker();
 
     void startToken(SegmentedString&, HTMLTokenizer&);
     void endToken(SegmentedString&, HTMLTokenizer&);
index 985c618..489e6c5 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2008-2016 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2008, 2015 Apple Inc. All Rights Reserved.
  * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
  *
@@ -31,7 +31,7 @@
 #include "HTMLEntityParser.h"
 #include "HTMLNames.h"
 #include "MarkupTokenizerInlines.h"
-#include <wtf/text/StringBuilder.h>
+#include <wtf/ASCIICType.h>
 
 using namespace WTF;
 
@@ -96,7 +96,7 @@ inline bool HTMLTokenizer::emitAndResumeInDataState(SegmentedString& source)
 {
     saveEndTagNameIfNeeded();
     m_state = DataState;
-    source.advancePastNonNewline();
+    source.advanceAndUpdateLineNumber();
     return true;
 }
 
@@ -157,9 +157,9 @@ void HTMLTokenizer::flushBufferedEndTag()
 
 bool HTMLTokenizer::commitToPartialEndTag(SegmentedString& source, UChar character, State state)
 {
-    ASSERT(source.currentCharacter() == character);
+    ASSERT(source.currentChar() == character);
     appendToTemporaryBuffer(character);
-    source.advancePastNonNewline();
+    source.advanceAndUpdateLineNumber();
 
     if (haveBufferedCharacterToken()) {
         // Emit the buffered character token.
@@ -174,9 +174,9 @@ bool HTMLTokenizer::commitToPartialEndTag(SegmentedString& source, UChar charact
 
 bool HTMLTokenizer::commitToCompleteEndTag(SegmentedString& source)
 {
-    ASSERT(source.currentCharacter() == '>');
+    ASSERT(source.currentChar() == '>');
     appendToTemporaryBuffer('>');
-    source.advancePastNonNewline();
+    source.advance();
 
     m_state = DataState;
 
@@ -212,11 +212,11 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
 
     BEGIN_STATE(DataState)
         if (character == '&')
-            ADVANCE_PAST_NON_NEWLINE_TO(CharacterReferenceInDataState);
+            ADVANCE_TO(CharacterReferenceInDataState);
         if (character == '<') {
             if (haveBufferedCharacterToken())
                 RETURN_IN_CURRENT_STATE(true);
-            ADVANCE_PAST_NON_NEWLINE_TO(TagOpenState);
+            ADVANCE_TO(TagOpenState);
         }
         if (character == kEndOfFileMarker)
             return emitEndOfFile(source);
@@ -232,9 +232,9 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
 
     BEGIN_STATE(RCDATAState)
         if (character == '&')
-            ADVANCE_PAST_NON_NEWLINE_TO(CharacterReferenceInRCDATAState);
+            ADVANCE_TO(CharacterReferenceInRCDATAState);
         if (character == '<')
-            ADVANCE_PAST_NON_NEWLINE_TO(RCDATALessThanSignState);
+            ADVANCE_TO(RCDATALessThanSignState);
         if (character == kEndOfFileMarker)
             RECONSUME_IN(DataState);
         bufferCharacter(character);
@@ -249,7 +249,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
 
     BEGIN_STATE(RAWTEXTState)
         if (character == '<')
-            ADVANCE_PAST_NON_NEWLINE_TO(RAWTEXTLessThanSignState);
+            ADVANCE_TO(RAWTEXTLessThanSignState);
         if (character == kEndOfFileMarker)
             RECONSUME_IN(DataState);
         bufferCharacter(character);
@@ -258,7 +258,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
 
     BEGIN_STATE(ScriptDataState)
         if (character == '<')
-            ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataLessThanSignState);
+            ADVANCE_TO(ScriptDataLessThanSignState);
         if (character == kEndOfFileMarker)
             RECONSUME_IN(DataState);
         bufferCharacter(character);
@@ -274,12 +274,12 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
 
     BEGIN_STATE(TagOpenState)
         if (character == '!')
-            ADVANCE_PAST_NON_NEWLINE_TO(MarkupDeclarationOpenState);
+            ADVANCE_TO(MarkupDeclarationOpenState);
         if (character == '/')
-            ADVANCE_PAST_NON_NEWLINE_TO(EndTagOpenState);
+            ADVANCE_TO(EndTagOpenState);
         if (isASCIIAlpha(character)) {
             m_token.beginStartTag(convertASCIIAlphaToLower(character));
-            ADVANCE_PAST_NON_NEWLINE_TO(TagNameState);
+            ADVANCE_TO(TagNameState);
         }
         if (character == '?') {
             parseError();
@@ -297,11 +297,11 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
         if (isASCIIAlpha(character)) {
             m_token.beginEndTag(convertASCIIAlphaToLower(character));
             m_appropriateEndTagName.clear();
-            ADVANCE_PAST_NON_NEWLINE_TO(TagNameState);
+            ADVANCE_TO(TagNameState);
         }
         if (character == '>') {
             parseError();
-            ADVANCE_PAST_NON_NEWLINE_TO(DataState);
+            ADVANCE_TO(DataState);
         }
         if (character == kEndOfFileMarker) {
             parseError();
@@ -317,7 +317,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
         if (isTokenizerWhitespace(character))
             ADVANCE_TO(BeforeAttributeNameState);
         if (character == '/')
-            ADVANCE_PAST_NON_NEWLINE_TO(SelfClosingStartTagState);
+            ADVANCE_TO(SelfClosingStartTagState);
         if (character == '>')
             return emitAndResumeInDataState(source);
         if (m_options.usePreHTML5ParserQuirks && character == '<')
@@ -327,14 +327,14 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
             RECONSUME_IN(DataState);
         }
         m_token.appendToName(toASCIILower(character));
-        ADVANCE_PAST_NON_NEWLINE_TO(TagNameState);
+        ADVANCE_TO(TagNameState);
     END_STATE()
 
     BEGIN_STATE(RCDATALessThanSignState)
         if (character == '/') {
             m_temporaryBuffer.clear();
             ASSERT(m_bufferedEndTagName.isEmpty());
-            ADVANCE_PAST_NON_NEWLINE_TO(RCDATAEndTagOpenState);
+            ADVANCE_TO(RCDATAEndTagOpenState);
         }
         bufferASCIICharacter('<');
         RECONSUME_IN(RCDATAState);
@@ -344,7 +344,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
         if (isASCIIAlpha(character)) {
             appendToTemporaryBuffer(character);
             appendToPossibleEndTag(convertASCIIAlphaToLower(character));
-            ADVANCE_PAST_NON_NEWLINE_TO(RCDATAEndTagNameState);
+            ADVANCE_TO(RCDATAEndTagNameState);
         }
         bufferASCIICharacter('<');
         bufferASCIICharacter('/');
@@ -355,7 +355,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
         if (isASCIIAlpha(character)) {
             appendToTemporaryBuffer(character);
             appendToPossibleEndTag(convertASCIIAlphaToLower(character));
-            ADVANCE_PAST_NON_NEWLINE_TO(RCDATAEndTagNameState);
+            ADVANCE_TO(RCDATAEndTagNameState);
         }
         if (isTokenizerWhitespace(character)) {
             if (isAppropriateEndTag()) {
@@ -385,7 +385,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
         if (character == '/') {
             m_temporaryBuffer.clear();
             ASSERT(m_bufferedEndTagName.isEmpty());
-            ADVANCE_PAST_NON_NEWLINE_TO(RAWTEXTEndTagOpenState);
+            ADVANCE_TO(RAWTEXTEndTagOpenState);
         }
         bufferASCIICharacter('<');
         RECONSUME_IN(RAWTEXTState);
@@ -395,7 +395,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
         if (isASCIIAlpha(character)) {
             appendToTemporaryBuffer(character);
             appendToPossibleEndTag(convertASCIIAlphaToLower(character));
-            ADVANCE_PAST_NON_NEWLINE_TO(RAWTEXTEndTagNameState);
+            ADVANCE_TO(RAWTEXTEndTagNameState);
         }
         bufferASCIICharacter('<');
         bufferASCIICharacter('/');
@@ -406,7 +406,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
         if (isASCIIAlpha(character)) {
             appendToTemporaryBuffer(character);
             appendToPossibleEndTag(convertASCIIAlphaToLower(character));
-            ADVANCE_PAST_NON_NEWLINE_TO(RAWTEXTEndTagNameState);
+            ADVANCE_TO(RAWTEXTEndTagNameState);
         }
         if (isTokenizerWhitespace(character)) {
             if (isAppropriateEndTag()) {
@@ -436,12 +436,12 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
         if (character == '/') {
             m_temporaryBuffer.clear();
             ASSERT(m_bufferedEndTagName.isEmpty());
-            ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEndTagOpenState);
+            ADVANCE_TO(ScriptDataEndTagOpenState);
         }
         if (character == '!') {
             bufferASCIICharacter('<');
             bufferASCIICharacter('!');
-            ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapeStartState);
+            ADVANCE_TO(ScriptDataEscapeStartState);
         }
         bufferASCIICharacter('<');
         RECONSUME_IN(ScriptDataState);
@@ -451,7 +451,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
         if (isASCIIAlpha(character)) {
             appendToTemporaryBuffer(character);
             appendToPossibleEndTag(convertASCIIAlphaToLower(character));
-            ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEndTagNameState);
+            ADVANCE_TO(ScriptDataEndTagNameState);
         }
         bufferASCIICharacter('<');
         bufferASCIICharacter('/');
@@ -462,7 +462,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
         if (isASCIIAlpha(character)) {
             appendToTemporaryBuffer(character);
             appendToPossibleEndTag(convertASCIIAlphaToLower(character));
-            ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEndTagNameState);
+            ADVANCE_TO(ScriptDataEndTagNameState);
         }
         if (isTokenizerWhitespace(character)) {
             if (isAppropriateEndTag()) {
@@ -491,7 +491,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
     BEGIN_STATE(ScriptDataEscapeStartState)
         if (character == '-') {
             bufferASCIICharacter('-');
-            ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapeStartDashState);
+            ADVANCE_TO(ScriptDataEscapeStartDashState);
         } else
             RECONSUME_IN(ScriptDataState);
     END_STATE()
@@ -499,7 +499,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
     BEGIN_STATE(ScriptDataEscapeStartDashState)
         if (character == '-') {
             bufferASCIICharacter('-');
-            ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedDashDashState);
+            ADVANCE_TO(ScriptDataEscapedDashDashState);
         } else
             RECONSUME_IN(ScriptDataState);
     END_STATE()
@@ -507,10 +507,10 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
     BEGIN_STATE(ScriptDataEscapedState)
         if (character == '-') {
             bufferASCIICharacter('-');
-            ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedDashState);
+            ADVANCE_TO(ScriptDataEscapedDashState);
         }
         if (character == '<')
-            ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedLessThanSignState);
+            ADVANCE_TO(ScriptDataEscapedLessThanSignState);
         if (character == kEndOfFileMarker) {
             parseError();
             RECONSUME_IN(DataState);
@@ -522,10 +522,10 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
     BEGIN_STATE(ScriptDataEscapedDashState)
         if (character == '-') {
             bufferASCIICharacter('-');
-            ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedDashDashState);
+            ADVANCE_TO(ScriptDataEscapedDashDashState);
         }
         if (character == '<')
-            ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedLessThanSignState);
+            ADVANCE_TO(ScriptDataEscapedLessThanSignState);
         if (character == kEndOfFileMarker) {
             parseError();
             RECONSUME_IN(DataState);
@@ -537,13 +537,13 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
     BEGIN_STATE(ScriptDataEscapedDashDashState)
         if (character == '-') {
             bufferASCIICharacter('-');
-            ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedDashDashState);
+            ADVANCE_TO(ScriptDataEscapedDashDashState);
         }
         if (character == '<')
-            ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedLessThanSignState);
+            ADVANCE_TO(ScriptDataEscapedLessThanSignState);
         if (character == '>') {
             bufferASCIICharacter('>');
-            ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataState);
+            ADVANCE_TO(ScriptDataState);
         }
         if (character == kEndOfFileMarker) {
             parseError();
@@ -557,14 +557,14 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
         if (character == '/') {
             m_temporaryBuffer.clear();
             ASSERT(m_bufferedEndTagName.isEmpty());
-            ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedEndTagOpenState);
+            ADVANCE_TO(ScriptDataEscapedEndTagOpenState);
         }
         if (isASCIIAlpha(character)) {
             bufferASCIICharacter('<');
             bufferASCIICharacter(character);
             m_temporaryBuffer.clear();
             appendToTemporaryBuffer(convertASCIIAlphaToLower(character));
-            ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapeStartState);
+            ADVANCE_TO(ScriptDataDoubleEscapeStartState);
         }
         bufferASCIICharacter('<');
         RECONSUME_IN(ScriptDataEscapedState);
@@ -574,7 +574,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
         if (isASCIIAlpha(character)) {
             appendToTemporaryBuffer(character);
             appendToPossibleEndTag(convertASCIIAlphaToLower(character));
-            ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedEndTagNameState);
+            ADVANCE_TO(ScriptDataEscapedEndTagNameState);
         }
         bufferASCIICharacter('<');
         bufferASCIICharacter('/');
@@ -585,7 +585,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
         if (isASCIIAlpha(character)) {
             appendToTemporaryBuffer(character);
             appendToPossibleEndTag(convertASCIIAlphaToLower(character));
-            ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedEndTagNameState);
+            ADVANCE_TO(ScriptDataEscapedEndTagNameState);
         }
         if (isTokenizerWhitespace(character)) {
             if (isAppropriateEndTag()) {
@@ -622,7 +622,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
         if (isASCIIAlpha(character)) {
             bufferASCIICharacter(character);
             appendToTemporaryBuffer(convertASCIIAlphaToLower(character));
-            ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapeStartState);
+            ADVANCE_TO(ScriptDataDoubleEscapeStartState);
         }
         RECONSUME_IN(ScriptDataEscapedState);
     END_STATE()
@@ -630,11 +630,11 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
     BEGIN_STATE(ScriptDataDoubleEscapedState)
         if (character == '-') {
             bufferASCIICharacter('-');
-            ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedDashState);
+            ADVANCE_TO(ScriptDataDoubleEscapedDashState);
         }
         if (character == '<') {
             bufferASCIICharacter('<');
-            ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedLessThanSignState);
+            ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
         }
         if (character == kEndOfFileMarker) {
             parseError();
@@ -647,11 +647,11 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
     BEGIN_STATE(ScriptDataDoubleEscapedDashState)
         if (character == '-') {
             bufferASCIICharacter('-');
-            ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedDashDashState);
+            ADVANCE_TO(ScriptDataDoubleEscapedDashDashState);
         }
         if (character == '<') {
             bufferASCIICharacter('<');
-            ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedLessThanSignState);
+            ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
         }
         if (character == kEndOfFileMarker) {
             parseError();
@@ -664,15 +664,15 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
     BEGIN_STATE(ScriptDataDoubleEscapedDashDashState)
         if (character == '-') {
             bufferASCIICharacter('-');
-            ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedDashDashState);
+            ADVANCE_TO(ScriptDataDoubleEscapedDashDashState);
         }
         if (character == '<') {
             bufferASCIICharacter('<');
-            ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedLessThanSignState);
+            ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
         }
         if (character == '>') {
             bufferASCIICharacter('>');
-            ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataState);
+            ADVANCE_TO(ScriptDataState);
         }
         if (character == kEndOfFileMarker) {
             parseError();
@@ -686,7 +686,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
         if (character == '/') {
             bufferASCIICharacter('/');
             m_temporaryBuffer.clear();
-            ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapeEndState);
+            ADVANCE_TO(ScriptDataDoubleEscapeEndState);
         }
         RECONSUME_IN(ScriptDataDoubleEscapedState);
     END_STATE()
@@ -702,7 +702,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
         if (isASCIIAlpha(character)) {
             bufferASCIICharacter(character);
             appendToTemporaryBuffer(convertASCIIAlphaToLower(character));
-            ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapeEndState);
+            ADVANCE_TO(ScriptDataDoubleEscapeEndState);
         }
         RECONSUME_IN(ScriptDataDoubleEscapedState);
     END_STATE()
@@ -711,7 +711,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
         if (isTokenizerWhitespace(character))
             ADVANCE_TO(BeforeAttributeNameState);
         if (character == '/')
-            ADVANCE_PAST_NON_NEWLINE_TO(SelfClosingStartTagState);
+            ADVANCE_TO(SelfClosingStartTagState);
         if (character == '>')
             return emitAndResumeInDataState(source);
         if (m_options.usePreHTML5ParserQuirks && character == '<')
@@ -724,16 +724,16 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
             parseError();
         m_token.beginAttribute(source.numberOfCharactersConsumed());
         m_token.appendToAttributeName(toASCIILower(character));
-        ADVANCE_PAST_NON_NEWLINE_TO(AttributeNameState);
+        ADVANCE_TO(AttributeNameState);
     END_STATE()
 
     BEGIN_STATE(AttributeNameState)
         if (isTokenizerWhitespace(character))
             ADVANCE_TO(AfterAttributeNameState);
         if (character == '/')
-            ADVANCE_PAST_NON_NEWLINE_TO(SelfClosingStartTagState);
+            ADVANCE_TO(SelfClosingStartTagState);
         if (character == '=')
-            ADVANCE_PAST_NON_NEWLINE_TO(BeforeAttributeValueState);
+            ADVANCE_TO(BeforeAttributeValueState);
         if (character == '>')
             return emitAndResumeInDataState(source);
         if (m_options.usePreHTML5ParserQuirks && character == '<')
@@ -745,16 +745,16 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
         if (character == '"' || character == '\'' || character == '<' || character == '=')
             parseError();
         m_token.appendToAttributeName(toASCIILower(character));
-        ADVANCE_PAST_NON_NEWLINE_TO(AttributeNameState);
+        ADVANCE_TO(AttributeNameState);
     END_STATE()
 
     BEGIN_STATE(AfterAttributeNameState)
         if (isTokenizerWhitespace(character))
             ADVANCE_TO(AfterAttributeNameState);
         if (character == '/')
-            ADVANCE_PAST_NON_NEWLINE_TO(SelfClosingStartTagState);
+            ADVANCE_TO(SelfClosingStartTagState);
         if (character == '=')
-            ADVANCE_PAST_NON_NEWLINE_TO(BeforeAttributeValueState);
+            ADVANCE_TO(BeforeAttributeValueState);
         if (character == '>')
             return emitAndResumeInDataState(source);
         if (m_options.usePreHTML5ParserQuirks && character == '<')
@@ -767,18 +767,18 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
             parseError();
         m_token.beginAttribute(source.numberOfCharactersConsumed());
         m_token.appendToAttributeName(toASCIILower(character));
-        ADVANCE_PAST_NON_NEWLINE_TO(AttributeNameState);
+        ADVANCE_TO(AttributeNameState);
     END_STATE()
 
     BEGIN_STATE(BeforeAttributeValueState)
         if (isTokenizerWhitespace(character))
             ADVANCE_TO(BeforeAttributeValueState);
         if (character == '"')
-            ADVANCE_PAST_NON_NEWLINE_TO(AttributeValueDoubleQuotedState);
+            ADVANCE_TO(AttributeValueDoubleQuotedState);
         if (character == '&')
             RECONSUME_IN(AttributeValueUnquotedState);
         if (character == '\'')
-            ADVANCE_PAST_NON_NEWLINE_TO(AttributeValueSingleQuotedState);
+            ADVANCE_TO(AttributeValueSingleQuotedState);
         if (character == '>') {
             parseError();
             return emitAndResumeInDataState(source);
@@ -790,17 +790,17 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
         if (character == '<' || character == '=' || character == '`')
             parseError();
         m_token.appendToAttributeValue(character);
-        ADVANCE_PAST_NON_NEWLINE_TO(AttributeValueUnquotedState);
+        ADVANCE_TO(AttributeValueUnquotedState);
     END_STATE()
 
     BEGIN_STATE(AttributeValueDoubleQuotedState)
         if (character == '"') {
             m_token.endAttribute(source.numberOfCharactersConsumed());
-            ADVANCE_PAST_NON_NEWLINE_TO(AfterAttributeValueQuotedState);
+            ADVANCE_TO(AfterAttributeValueQuotedState);
         }
         if (character == '&') {
             m_additionalAllowedCharacter = '"';
-            ADVANCE_PAST_NON_NEWLINE_TO(CharacterReferenceInAttributeValueState);
+            ADVANCE_TO(CharacterReferenceInAttributeValueState);
         }
         if (character == kEndOfFileMarker) {
             parseError();
@@ -814,11 +814,11 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
     BEGIN_STATE(AttributeValueSingleQuotedState)
         if (character == '\'') {
             m_token.endAttribute(source.numberOfCharactersConsumed());
-            ADVANCE_PAST_NON_NEWLINE_TO(AfterAttributeValueQuotedState);
+            ADVANCE_TO(AfterAttributeValueQuotedState);
         }
         if (character == '&') {
             m_additionalAllowedCharacter = '\'';
-            ADVANCE_PAST_NON_NEWLINE_TO(CharacterReferenceInAttributeValueState);
+            ADVANCE_TO(CharacterReferenceInAttributeValueState);
         }
         if (character == kEndOfFileMarker) {
             parseError();
@@ -836,7 +836,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
         }
         if (character == '&') {
             m_additionalAllowedCharacter = '>';
-            ADVANCE_PAST_NON_NEWLINE_TO(CharacterReferenceInAttributeValueState);
+            ADVANCE_TO(CharacterReferenceInAttributeValueState);
         }
         if (character == '>') {
             m_token.endAttribute(source.numberOfCharactersConsumed());
@@ -850,7 +850,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
         if (character == '"' || character == '\'' || character == '<' || character == '=' || character == '`')
             parseError();
         m_token.appendToAttributeValue(character);
-        ADVANCE_PAST_NON_NEWLINE_TO(AttributeValueUnquotedState);
+        ADVANCE_TO(AttributeValueUnquotedState);
     END_STATE()
 
     BEGIN_STATE(CharacterReferenceInAttributeValueState)
@@ -882,7 +882,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
         if (isTokenizerWhitespace(character))
             ADVANCE_TO(BeforeAttributeNameState);
         if (character == '/')
-            ADVANCE_PAST_NON_NEWLINE_TO(SelfClosingStartTagState);
+            ADVANCE_TO(SelfClosingStartTagState);
         if (character == '>')
             return emitAndResumeInDataState(source);
         if (m_options.usePreHTML5ParserQuirks && character == '<')
@@ -932,7 +932,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
             if (result == SegmentedString::NotEnoughCharacters)
                 RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
         } else if (isASCIIAlphaCaselessEqual(character, 'd')) {
-            auto result = source.advancePastLettersIgnoringASCIICase("doctype");
+            auto result = source.advancePastIgnoringCase("doctype");
             if (result == SegmentedString::DidMatch)
                 SWITCH_TO(DOCTYPEState);
             if (result == SegmentedString::NotEnoughCharacters)
@@ -950,7 +950,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
 
     BEGIN_STATE(CommentStartState)
         if (character == '-')
-            ADVANCE_PAST_NON_NEWLINE_TO(CommentStartDashState);
+            ADVANCE_TO(CommentStartDashState);
         if (character == '>') {
             parseError();
             return emitAndResumeInDataState(source);
@@ -965,7 +965,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
 
     BEGIN_STATE(CommentStartDashState)
         if (character == '-')
-            ADVANCE_PAST_NON_NEWLINE_TO(CommentEndState);
+            ADVANCE_TO(CommentEndState);
         if (character == '>') {
             parseError();
             return emitAndResumeInDataState(source);
@@ -981,7 +981,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
 
     BEGIN_STATE(CommentState)
         if (character == '-')
-            ADVANCE_PAST_NON_NEWLINE_TO(CommentEndDashState);
+            ADVANCE_TO(CommentEndDashState);
         if (character == kEndOfFileMarker) {
             parseError();
             return emitAndReconsumeInDataState();
@@ -992,7 +992,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
 
     BEGIN_STATE(CommentEndDashState)
         if (character == '-')
-            ADVANCE_PAST_NON_NEWLINE_TO(CommentEndState);
+            ADVANCE_TO(CommentEndState);
         if (character == kEndOfFileMarker) {
             parseError();
             return emitAndReconsumeInDataState();
@@ -1007,12 +1007,12 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
             return emitAndResumeInDataState(source);
         if (character == '!') {
             parseError();
-            ADVANCE_PAST_NON_NEWLINE_TO(CommentEndBangState);
+            ADVANCE_TO(CommentEndBangState);
         }
         if (character == '-') {
             parseError();
             m_token.appendToComment('-');
-            ADVANCE_PAST_NON_NEWLINE_TO(CommentEndState);
+            ADVANCE_TO(CommentEndState);
         }
         if (character == kEndOfFileMarker) {
             parseError();
@@ -1030,7 +1030,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
             m_token.appendToComment('-');
             m_token.appendToComment('-');
             m_token.appendToComment('!');
-            ADVANCE_PAST_NON_NEWLINE_TO(CommentEndDashState);
+            ADVANCE_TO(CommentEndDashState);
         }
         if (character == '>')
             return emitAndResumeInDataState(source);
@@ -1074,7 +1074,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
             return emitAndReconsumeInDataState();
         }
         m_token.beginDOCTYPE(toASCIILower(character));
-        ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPENameState);
+        ADVANCE_TO(DOCTYPENameState);
     END_STATE()
 
     BEGIN_STATE(DOCTYPENameState)
@@ -1088,7 +1088,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
             return emitAndReconsumeInDataState();
         }
         m_token.appendToName(toASCIILower(character));
-        ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPENameState);
+        ADVANCE_TO(DOCTYPENameState);
     END_STATE()
 
     BEGIN_STATE(AfterDOCTYPENameState)
@@ -1102,13 +1102,13 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
             return emitAndReconsumeInDataState();
         }
         if (isASCIIAlphaCaselessEqual(character, 'p')) {
-            auto result = source.advancePastLettersIgnoringASCIICase("public");
+            auto result = source.advancePastIgnoringCase("public");
             if (result == SegmentedString::DidMatch)
                 SWITCH_TO(AfterDOCTYPEPublicKeywordState);
             if (result == SegmentedString::NotEnoughCharacters)
                 RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
         } else if (isASCIIAlphaCaselessEqual(character, 's')) {
-            auto result = source.advancePastLettersIgnoringASCIICase("system");
+            auto result = source.advancePastIgnoringCase("system");
             if (result == SegmentedString::DidMatch)
                 SWITCH_TO(AfterDOCTYPESystemKeywordState);
             if (result == SegmentedString::NotEnoughCharacters)
@@ -1116,7 +1116,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
         }
         parseError();
         m_token.setForceQuirks();
-        ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState);
+        ADVANCE_TO(BogusDOCTYPEState);
     END_STATE()
 
     BEGIN_STATE(AfterDOCTYPEPublicKeywordState)
@@ -1125,12 +1125,12 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
         if (character == '"') {
             parseError();
             m_token.setPublicIdentifierToEmptyString();
-            ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
+            ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
         }
         if (character == '\'') {
             parseError();
             m_token.setPublicIdentifierToEmptyString();
-            ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
+            ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
         }
         if (character == '>') {
             parseError();
@@ -1144,7 +1144,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
         }
         parseError();
         m_token.setForceQuirks();
-        ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState);
+        ADVANCE_TO(BogusDOCTYPEState);
     END_STATE()
 
     BEGIN_STATE(BeforeDOCTYPEPublicIdentifierState)
@@ -1152,11 +1152,11 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
             ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
         if (character == '"') {
             m_token.setPublicIdentifierToEmptyString();
-            ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
+            ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
         }
         if (character == '\'') {
             m_token.setPublicIdentifierToEmptyString();
-            ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
+            ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
         }
         if (character == '>') {
             parseError();
@@ -1170,12 +1170,12 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
         }
         parseError();
         m_token.setForceQuirks();
-        ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState);
+        ADVANCE_TO(BogusDOCTYPEState);
     END_STATE()
 
     BEGIN_STATE(DOCTYPEPublicIdentifierDoubleQuotedState)
         if (character == '"')
-            ADVANCE_PAST_NON_NEWLINE_TO(AfterDOCTYPEPublicIdentifierState);
+            ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
         if (character == '>') {
             parseError();
             m_token.setForceQuirks();
@@ -1192,7 +1192,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
 
     BEGIN_STATE(DOCTYPEPublicIdentifierSingleQuotedState)
         if (character == '\'')
-            ADVANCE_PAST_NON_NEWLINE_TO(AfterDOCTYPEPublicIdentifierState);
+            ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
         if (character == '>') {
             parseError();
             m_token.setForceQuirks();
@@ -1215,12 +1215,12 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
         if (character == '"') {
             parseError();
             m_token.setSystemIdentifierToEmptyString();
-            ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
+            ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
         }
         if (character == '\'') {
             parseError();
             m_token.setSystemIdentifierToEmptyString();
-            ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierSingleQuotedState);
+            ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
         }
         if (character == kEndOfFileMarker) {
             parseError();
@@ -1229,7 +1229,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
         }
         parseError();
         m_token.setForceQuirks();
-        ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState);
+        ADVANCE_TO(BogusDOCTYPEState);
     END_STATE()
 
     BEGIN_STATE(BetweenDOCTYPEPublicAndSystemIdentifiersState)
@@ -1239,11 +1239,11 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
             return emitAndResumeInDataState(source);
         if (character == '"') {
             m_token.setSystemIdentifierToEmptyString();
-            ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
+            ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
         }
         if (character == '\'') {
             m_token.setSystemIdentifierToEmptyString();
-            ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierSingleQuotedState);
+            ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
         }
         if (character == kEndOfFileMarker) {
             parseError();
@@ -1252,7 +1252,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
         }
         parseError();
         m_token.setForceQuirks();
-        ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState);
+        ADVANCE_TO(BogusDOCTYPEState);
     END_STATE()
 
     BEGIN_STATE(AfterDOCTYPESystemKeywordState)
@@ -1261,12 +1261,12 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
         if (character == '"') {
             parseError();
             m_token.setSystemIdentifierToEmptyString();
-            ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
+            ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
         }
         if (character == '\'') {
             parseError();
             m_token.setSystemIdentifierToEmptyString();
-            ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierSingleQuotedState);
+            ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
         }
         if (character == '>') {
             parseError();
@@ -1280,7 +1280,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
         }
         parseError();
         m_token.setForceQuirks();
-        ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState);
+        ADVANCE_TO(BogusDOCTYPEState);
     END_STATE()
 
     BEGIN_STATE(BeforeDOCTYPESystemIdentifierState)
@@ -1288,11 +1288,11 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
             ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
         if (character == '"') {
             m_token.setSystemIdentifierToEmptyString();
-            ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
+            ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
         }
         if (character == '\'') {
             m_token.setSystemIdentifierToEmptyString();
-            ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierSingleQuotedState);
+            ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
         }
         if (character == '>') {
             parseError();
@@ -1306,12 +1306,12 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
         }
         parseError();
         m_token.setForceQuirks();
-        ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState);
+        ADVANCE_TO(BogusDOCTYPEState);
     END_STATE()
 
     BEGIN_STATE(DOCTYPESystemIdentifierDoubleQuotedState)
         if (character == '"')
-            ADVANCE_PAST_NON_NEWLINE_TO(AfterDOCTYPESystemIdentifierState);
+            ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
         if (character == '>') {
             parseError();
             m_token.setForceQuirks();
@@ -1328,7 +1328,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
 
     BEGIN_STATE(DOCTYPESystemIdentifierSingleQuotedState)
         if (character == '\'')
-            ADVANCE_PAST_NON_NEWLINE_TO(AfterDOCTYPESystemIdentifierState);
+            ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
         if (character == '>') {
             parseError();
             m_token.setForceQuirks();
@@ -1354,7 +1354,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
             return emitAndReconsumeInDataState();
         }
         parseError();
-        ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState);
+        ADVANCE_TO(BogusDOCTYPEState);
     END_STATE()
 
     BEGIN_STATE(BogusDOCTYPEState)
@@ -1367,7 +1367,7 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
 
     BEGIN_STATE(CDATASectionState)
         if (character == ']')
-            ADVANCE_PAST_NON_NEWLINE_TO(CDATASectionRightSquareBracketState);
+            ADVANCE_TO(CDATASectionRightSquareBracketState);
         if (character == kEndOfFileMarker)
             RECONSUME_IN(DataState);
         bufferCharacter(character);
@@ -1376,14 +1376,14 @@ bool HTMLTokenizer::processToken(SegmentedString& source)
 
     BEGIN_STATE(CDATASectionRightSquareBracketState)
         if (character == ']')
-            ADVANCE_PAST_NON_NEWLINE_TO(CDATASectionDoubleRightSquareBracketState);
+            ADVANCE_TO(CDATASectionDoubleRightSquareBracketState);
         bufferASCIICharacter(']');
         RECONSUME_IN(CDATASectionState);
     END_STATE()
 
     BEGIN_STATE(CDATASectionDoubleRightSquareBracketState)
         if (character == '>')
-            ADVANCE_PAST_NON_NEWLINE_TO(DataState);
+            ADVANCE_TO(DataState);
         bufferASCIICharacter(']');
         bufferASCIICharacter(']');
         RECONSUME_IN(CDATASectionState);
index 361f653..d5fb02f 100644 (file)
 #pragma once
 
 #include "SegmentedString.h"
+#include <wtf/Noncopyable.h>
 #include <wtf/unicode/CharacterNames.h>
 
 namespace WebCore {
 
+const LChar kEndOfFileMarker = 0;
+
 // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-input-stream
 template <typename Tokenizer>
 class InputStreamPreprocessor {
+    WTF_MAKE_NONCOPYABLE(InputStreamPreprocessor);
 public:
     explicit InputStreamPreprocessor(Tokenizer& tokenizer)
         : m_tokenizer(tokenizer)
     {
+        reset();
     }
 
     ALWAYS_INLINE UChar nextInputCharacter() const { return m_nextInputCharacter; }
@@ -48,68 +53,75 @@ public:
     // characters in |source| (after collapsing \r\n, etc).
     ALWAYS_INLINE bool peek(SegmentedString& source, bool skipNullCharacters = false)
     {
-        if (UNLIKELY(source.isEmpty()))
+        if (source.isEmpty())
             return false;
 
-        m_nextInputCharacter = source.currentCharacter();
+        m_nextInputCharacter = source.currentChar();
 
         // Every branch in this function is expensive, so we have a
         // fast-reject branch for characters that don't require special
         // handling. Please run the parser benchmark whenever you touch
         // this function. It's very hot.
-        constexpr UChar specialCharacterMask = '\n' | '\r' | '\0';
-        if (LIKELY(m_nextInputCharacter & ~specialCharacterMask)) {
+        static const UChar specialCharacterMask = '\n' | '\r' | '\0';
+        if (m_nextInputCharacter & ~specialCharacterMask) {
             m_skipNextNewLine = false;
             return true;
         }
-
         return processNextInputCharacter(source, skipNullCharacters);
     }
 
     // Returns whether there are more characters in |source| after advancing.
     ALWAYS_INLINE bool advance(SegmentedString& source, bool skipNullCharacters = false)
     {
-        source.advance();
+        source.advanceAndUpdateLineNumber();
         return peek(source, skipNullCharacters);
     }
-    ALWAYS_INLINE bool advancePastNonNewline(SegmentedString& source, bool skipNullCharacters = false)
+
+    bool skipNextNewLine() const { return m_skipNextNewLine; }
+
+    void reset(bool skipNextNewLine = false)
     {
-        source.advancePastNonNewline();
-        return peek(source, skipNullCharacters);
+        m_nextInputCharacter = '\0';
+        m_skipNextNewLine = skipNextNewLine;
     }
 
 private:
     bool processNextInputCharacter(SegmentedString& source, bool skipNullCharacters)
     {
     ProcessAgain:
-        ASSERT(m_nextInputCharacter == source.currentCharacter());
+        ASSERT(m_nextInputCharacter == source.currentChar());
+
         if (m_nextInputCharacter == '\n' && m_skipNextNewLine) {
             m_skipNextNewLine = false;
-            source.advancePastNewline();
+            source.advancePastNewlineAndUpdateLineNumber();
             if (source.isEmpty())
                 return false;
-            m_nextInputCharacter = source.currentCharacter();
+            m_nextInputCharacter = source.currentChar();
         }
         if (m_nextInputCharacter == '\r') {
             m_nextInputCharacter = '\n';
             m_skipNextNewLine = true;
-            return true;
-        }
-        m_skipNextNewLine = false;
-        if (m_nextInputCharacter || isAtEndOfFile(source))
-            return true;
-        if (skipNullCharacters && !m_tokenizer.neverSkipNullCharacters()) {
-            source.advancePastNonNewline();
-            if (source.isEmpty())
-                return false;
-            m_nextInputCharacter = source.currentCharacter();
-            goto ProcessAgain;
+        } else {
+            m_skipNextNewLine = false;
+            // FIXME: The spec indicates that the surrogate pair range as well as
+            // a number of specific character values are parse errors and should be replaced
+            // by the replacement character. We suspect this is a problem with the spec as doing
+            // that filtering breaks surrogate pair handling and causes us not to match Minefield.
+            if (m_nextInputCharacter == '\0' && !shouldTreatNullAsEndOfFileMarker(source)) {
+                if (skipNullCharacters && !m_tokenizer.neverSkipNullCharacters()) {
+                    source.advancePastNonNewline();
+                    if (source.isEmpty())
+                        return false;
+                    m_nextInputCharacter = source.currentChar();
+                    goto ProcessAgain;
+                }
+                m_nextInputCharacter = replacementCharacter;
+            }
         }
-        m_nextInputCharacter = replacementCharacter;
         return true;
     }
 
-    static bool isAtEndOfFile(SegmentedString& source)
+    bool shouldTreatNullAsEndOfFileMarker(SegmentedString& source) const
     {
         return source.isClosed() && source.length() == 1;
     }
@@ -117,8 +129,8 @@ private:
     Tokenizer& m_tokenizer;
 
     // http://www.whatwg.org/specs/web-apps/current-work/#next-input-character
-    UChar m_nextInputCharacter { 0 };
-    bool m_skipNextNewLine { false };
+    UChar m_nextInputCharacter;
+    bool m_skipNextNewLine;
 };
 
 } // namespace WebCore
index 8f23625..c23924b 100644 (file)
 
 namespace WebCore {
 
-std::optional<String> BufferedLineReader::nextLine()
+bool BufferedLineReader::getLine(String& line)
 {
     if (m_maybeSkipLF) {
         // We ran out of data after a CR (U+000D), which means that we may be
         // in the middle of a CRLF pair. If the next character is a LF (U+000A)
         // then skip it, and then (unconditionally) return the buffered line.
         if (!m_buffer.isEmpty()) {
-            if (m_buffer.currentCharacter() == newlineCharacter)
-                m_buffer.advancePastNewline();
+            scanCharacter(newlineCharacter);
             m_maybeSkipLF = false;
         }
         // If there was no (new) data available, then keep m_maybeSkipLF set,
-        // and fall through all the way down to the EOS check at the end of the function.
+        // and fall through all the way down to the EOS check at the end of
+        // the method.
     }
 
     bool shouldReturnLine = false;
     bool checkForLF = false;
     while (!m_buffer.isEmpty()) {
-        UChar character = m_buffer.currentCharacter();
+        UChar c = m_buffer.currentChar();
         m_buffer.advance();
 
-        if (character == newlineCharacter || character == carriageReturn) {
+        if (c == newlineCharacter || c == carriageReturn) {
             // We found a line ending. Return the accumulated line.
             shouldReturnLine = true;
-            checkForLF = (character == carriageReturn);
+            checkForLF = (c == carriageReturn);
             break;
         }
 
         // NULs are transformed into U+FFFD (REPLACEMENT CHAR.) in step 1 of
         // the WebVTT parser algorithm.
-        if (character == '\0')
-            character = replacementCharacter;
+        if (c == '\0')
+            c = replacementCharacter;
 
-        m_lineBuffer.append(character);
+        m_lineBuffer.append(c);
     }
 
     if (checkForLF) {
         // May be in the middle of a CRLF pair.
         if (!m_buffer.isEmpty()) {
-            if (m_buffer.currentCharacter() == newlineCharacter)
-                m_buffer.advancePastNewline();
+            // Scan a potential newline character.
+            scanCharacter(newlineCharacter);
         } else {
-            // Check for the newline on the next call (unless we reached EOS, in
+            // Check for the LF on the next call (unless we reached EOS, in
             // which case we'll return the contents of the line buffer, and
             // reset state for the next line.)
             m_maybeSkipLF = true;
@@ -92,13 +92,13 @@ std::optional<String> BufferedLineReader::nextLine()
     }
 
     if (shouldReturnLine) {
-        auto line = m_lineBuffer.toString();
+        line = m_lineBuffer.toString();
         m_lineBuffer.clear();
-        return WTFMove(line);
+        return true;
     }
 
     ASSERT(m_buffer.isEmpty());
-    return std::nullopt;
+    return false;
 }
 
 } // namespace WebCore
index 9334ae7..9494910 100644 (file)
@@ -38,39 +38,50 @@ namespace WebCore {
 // Line collection helper for the WebVTT Parser.
 //
 // Converts a stream of data (== a sequence of Strings) into a set of
-// lines. CR, LR or CRLF are considered line breaks. Normalizes NULs (U+0000)
-// to 'REPLACEMENT CHARACTER' (U+FFFD) and does not return the line breaks as
+// lines. CR, LR or CRLF are considered linebreaks. Normalizes NULs (U+0000)
+// to 'REPLACEMENT CHARACTER' (U+FFFD) and does not return the linebreaks as
 // part of the result.
 class BufferedLineReader {
     WTF_MAKE_NONCOPYABLE(BufferedLineReader);
 public:
-    BufferedLineReader() = default;
-    void reset();
+    BufferedLineReader()
+        : m_endOfStream(false)
+        , m_maybeSkipLF(false) { }
 
-    void append(String&& data)
+    // Append data to the internal buffer.
+    void append(const String& data)
     {
         ASSERT(!m_endOfStream);
-        m_buffer.append(WTFMove(data));
+        m_buffer.append(SegmentedString(data));
     }
 
-    void appendEndOfStream() { m_endOfStream = true; }
+    // Indicate that no more data will be appended. This will cause any
+    // potentially "unterminated" line to be returned from getLine.
+    void setEndOfStream() { m_endOfStream = true; }
+
+    // Attempt to read a line from the internal buffer (fed via append).
+    // If successful, true is returned and |line| is set to the line that was
+    // read. If no line could be read false is returned.
+    bool getLine(String& line);
+
+    // Returns true if EOS has been reached proper.
     bool isAtEndOfStream() const { return m_endOfStream && m_buffer.isEmpty(); }
 
-    std::optional<String> nextLine();
+    void reset() { m_buffer.clear(); }
 
 private:
+    // Consume the next character the buffer if it is the character |c|.
+    void scanCharacter(UChar c)
+    {
+        ASSERT(!m_buffer.isEmpty());
+        if (m_buffer.currentChar() == c)
+            m_buffer.advance();
+    }
+
     SegmentedString m_buffer;
     StringBuilder m_lineBuffer;
-    bool m_endOfStream { false };
-    bool m_maybeSkipLF { false };
+    bool m_endOfStream;
+    bool m_maybeSkipLF;
 };
 
-inline void BufferedLineReader::reset()
-{
-    m_buffer.clear();
-    m_lineBuffer.clear();
-    m_endOfStream = false;
-    m_maybeSkipLF = false;
-}
-
 } // namespace WebCore
index 091a5d2..1b84466 100644 (file)
@@ -185,10 +185,10 @@ void InbandGenericTextTrack::parseWebVTTCueData(InbandTextTrackPrivate* trackPri
     parser().parseCueData(cueData);
 }
 
-void InbandGenericTextTrack::parseWebVTTFileHeader(InbandTextTrackPrivate* trackPrivate, String&& header)
+void InbandGenericTextTrack::parseWebVTTFileHeader(InbandTextTrackPrivate* trackPrivate, String header)
 {
     ASSERT_UNUSED(trackPrivate, trackPrivate == m_private);
-    parser().parseFileHeader(WTFMove(header));
+    parser().parseFileHeader(header);
 }
 
 void InbandGenericTextTrack::newCuesParsed()
index 1c348b2..4f302c6 100644 (file)
@@ -72,7 +72,7 @@ private:
 
     WebVTTParser& parser();
     void parseWebVTTCueData(InbandTextTrackPrivate*, const ISOWebVTTCue&) final;
-    void parseWebVTTFileHeader(InbandTextTrackPrivate*, String&&) final;
+    void parseWebVTTFileHeader(InbandTextTrackPrivate*, String) final;
 
     void newCuesParsed() final;
     void newRegionsParsed() final;
index 9cd027c..e5f8d2b 100644 (file)
@@ -79,7 +79,7 @@ private:
     void updateGenericCue(InbandTextTrackPrivate*, GenericCueData*) override { ASSERT_NOT_REACHED(); }
     void removeGenericCue(InbandTextTrackPrivate*, GenericCueData*) override { ASSERT_NOT_REACHED(); }
 
-    void parseWebVTTFileHeader(InbandTextTrackPrivate*, String&&) override { ASSERT_NOT_REACHED(); }
+    void parseWebVTTFileHeader(InbandTextTrackPrivate*, String) override { ASSERT_NOT_REACHED(); }
     void parseWebVTTCueData(InbandTextTrackPrivate*, const char*, unsigned) override { ASSERT_NOT_REACHED(); }
     void parseWebVTTCueData(InbandTextTrackPrivate*, const ISOWebVTTCue&) override { ASSERT_NOT_REACHED(); }
 
index 6461101..3f785f8 100644 (file)
@@ -104,23 +104,24 @@ void WebVTTParser::getNewRegions(Vector<RefPtr<VTTRegion>>& outputRegions)
     m_regionList.clear();
 }
 
-void WebVTTParser::parseFileHeader(String&& data)
+void WebVTTParser::parseFileHeader(const String& data)
 {
     m_state = Initial;
     m_lineReader.reset();
-    m_lineReader.append(WTFMove(data));
+    m_lineReader.append(data);
     parse();
 }
 
 void WebVTTParser::parseBytes(const char* data, unsigned length)
 {
-    m_lineReader.append(m_decoder->decode(data, length));
+    String textData = m_decoder->decode(data, length);
+    m_lineReader.append(textData);
     parse();
 }
 
 void WebVTTParser::parseCueData(const ISOWebVTTCue& data)
 {
-    auto cue = WebVTTCueData::create();
+    RefPtr<WebVTTCueData> cue = WebVTTCueData::create();
 
     MediaTime startTime = data.presentationTime();
     cue->setStartTime(startTime);
@@ -134,15 +135,16 @@ void WebVTTParser::parseCueData(const ISOWebVTTCue& data)
     if (WebVTTParser::collectTimeStamp(data.originalStartTime(), originalStartTime))
         cue->setOriginalStartTime(originalStartTime);
 
-    m_cuelist.append(WTFMove(cue));
+    m_cuelist.append(cue);
     if (m_client)
         m_client->newCuesParsed();
 }
 
 void WebVTTParser::flush()
 {
-    m_lineReader.append(m_decoder->flush());
-    m_lineReader.appendEndOfStream();
+    String textData = m_decoder->flush();
+    m_lineReader.append(textData);
+    m_lineReader.setEndOfStream();
     parse();
     flushPendingCue();
 }
@@ -151,11 +153,15 @@ void WebVTTParser::parse()
 {    
     // WebVTT parser algorithm. (5.1 WebVTT file parsing.)
     // Steps 1 - 3 - Initial setup.
-    while (auto line = m_lineReader.nextLine()) {
+    String line;
+    while (m_lineReader.getLine(line)) {
+        if (line.isNull())
+            return;
+
         switch (m_state) {
         case Initial:
             // Steps 4 - 9 - Check for a valid WebVTT signature.
-            if (!hasRequiredFileIdentifier(*line)) {
+            if (!hasRequiredFileIdentifier(line)) {
                 if (m_client)
                     m_client->fileFailedToParse();
                 return;
@@ -165,9 +171,9 @@ void WebVTTParser::parse()
             break;
 
         case Header:
-            collectMetadataHeader(*line);
+            collectMetadataHeader(line);
 
-            if (line->isEmpty()) {
+            if (line.isEmpty()) {
                 // Steps 10-14 - Allow a header (comment area) under the WEBVTT line.
                 if (m_client && m_regionList.size())
                     m_client->newRegionsParsed();
@@ -175,43 +181,43 @@ void WebVTTParser::parse()
                 break;
             }
             // Step 15 - Break out of header loop if the line could be a timestamp line.
-            if (line->contains("-->"))
-                m_state = recoverCue(*line);
+            if (line.contains("-->"))
+                m_state = recoverCue(line);
 
             // Step 16 - Line is not the empty string and does not contain "-->".
             break;
 
         case Id:
             // Steps 17 - 20 - Allow any number of line terminators, then initialize new cue values.
-            if (line->isEmpty())
+            if (line.isEmpty())
                 break;
 
             // Step 21 - Cue creation (start a new cue).
             resetCueValues();
 
             // Steps 22 - 25 - Check if this line contains an optional identifier or timing data.
-            m_state = collectCueId(*line);
+            m_state = collectCueId(line);
             break;
 
         case TimingsAndSettings:
             // Steps 26 - 27 - Discard current cue if the line is empty.
-            if (line->isEmpty()) {
+            if (line.isEmpty()) {
                 m_state = Id;
                 break;
             }
 
             // Steps 28 - 29 - Collect cue timings and settings.
-            m_state = collectTimingsAndSettings(*line);
+            m_state = collectTimingsAndSettings(line);
             break;
 
         case CueText:
             // Steps 31 - 41 - Collect the cue text, create a cue, and add it to the output.
-            m_state = collectCueText(*line);
+            m_state = collectCueText(line);
             break;
 
         case BadCue:
             // Steps 42 - 48 - Discard lines until an empty line or a potential timing line is seen.
-            m_state = ignoreBadCue(*line);
+            m_state = ignoreBadCue(line);
             break;
 
         case Finished:
index 69ac055..734e204 100644 (file)
@@ -133,7 +133,7 @@ public:
 
     // Input data to the parser to parse.
     void parseBytes(const char*, unsigned);
-    void parseFileHeader(String&&);
+    void parseFileHeader(const String&);
     void parseCueData(const ISOWebVTTCue&);
     void flush();
     void fileFinished();
index 024c04a..0cb19e8 100644 (file)
  */
 
 #include "config.h"
-#include "WebVTTTokenizer.h"
 
 #if ENABLE(VIDEO_TRACK)
 
+#include "WebVTTTokenizer.h"
+
 #include "MarkupTokenizerInlines.h"
 #include <wtf/text/StringBuilder.h>
 #include <wtf/unicode/CharacterNames.h>
@@ -47,7 +48,7 @@ namespace WebCore {
         character = m_preprocessor.nextInputCharacter();    \
         goto stateName;                                     \
     } while (false)
-
+    
 template<unsigned charactersCount> ALWAYS_INLINE bool equalLiteral(const StringBuilder& s, const char (&characters)[charactersCount])
 {
     return WTF::equal(s, reinterpret_cast<const LChar*>(characters), charactersCount - 1);
@@ -68,7 +69,7 @@ inline bool emitToken(WebVTTToken& resultToken, const WebVTTToken& token)
 
 inline bool advanceAndEmitToken(SegmentedString& source, WebVTTToken& resultToken, const WebVTTToken& token)
 {
-    source.advance();
+    source.advanceAndUpdateLineNumber();
     return emitToken(resultToken, token);
 }
 
@@ -78,7 +79,7 @@ WebVTTTokenizer::WebVTTTokenizer(const String& input)
 {
     // Append an EOF marker and close the input "stream".
     ASSERT(!m_input.isClosed());
-    m_input.append(String { &kEndOfFileMarker, 1 });
+    m_input.append(SegmentedString(String(&kEndOfFileMarker, 1)));
     m_input.close();
 }
 
index 760f679..be252e4 100644 (file)
@@ -180,7 +180,7 @@ public:
     virtual void updateGenericCue(InbandTextTrackPrivate*, GenericCueData*) = 0;
     virtual void removeGenericCue(InbandTextTrackPrivate*, GenericCueData*) = 0;
 
-    virtual void parseWebVTTFileHeader(InbandTextTrackPrivate*, String&&) { ASSERT_NOT_REACHED(); }
+    virtual void parseWebVTTFileHeader(InbandTextTrackPrivate*, String) { ASSERT_NOT_REACHED(); }
     virtual void parseWebVTTCueData(InbandTextTrackPrivate*, const char* data, unsigned length) = 0;
     virtual void parseWebVTTCueData(InbandTextTrackPrivate*, const ISOWebVTTCue&) = 0;
 };
index ff31b2a..491d16c 100644 (file)
@@ -1,5 +1,5 @@
 /*
-    Copyright (C) 2004-2016 Apple Inc. All rights reserved.
+    Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
 
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Library General Public
 #include "config.h"
 #include "SegmentedString.h"
 
-#include <wtf/text/StringBuilder.h>
 #include <wtf/text/TextPosition.h>
 
 namespace WebCore {
 
-inline void SegmentedString::Substring::appendTo(StringBuilder& builder) const
+SegmentedString::SegmentedString(const SegmentedString& other)
+    : m_pushedChar1(other.m_pushedChar1)
+    , m_pushedChar2(other.m_pushedChar2)
+    , m_currentString(other.m_currentString)
+    , m_numberOfCharactersConsumedPriorToCurrentString(other.m_numberOfCharactersConsumedPriorToCurrentString)
+    , m_numberOfCharactersConsumedPriorToCurrentLine(other.m_numberOfCharactersConsumedPriorToCurrentLine)
+    , m_currentLine(other.m_currentLine)
+    , m_substrings(other.m_substrings)
+    , m_closed(other.m_closed)
+    , m_empty(other.m_empty)
+    , m_fastPathFlags(other.m_fastPathFlags)
+    , m_advanceFunc(other.m_advanceFunc)
+    , m_advanceAndUpdateLineNumberFunc(other.m_advanceAndUpdateLineNumberFunc)
 {
-    builder.append(string, string.length() - length, length);
+    if (m_pushedChar2)
+        m_currentChar = m_pushedChar2;
+    else if (m_pushedChar1)
+        m_currentChar = m_pushedChar1;
+    else
+        m_currentChar = m_currentString.m_length ? m_currentString.getCurrentChar() : 0;
 }
 
-SegmentedString& SegmentedString::operator=(SegmentedString&& other)
+SegmentedString& SegmentedString::operator=(const SegmentedString& other)
 {
-    m_currentSubstring = WTFMove(other.m_currentSubstring);
-    m_otherSubstrings = WTFMove(other.m_otherSubstrings);
-
-    m_isClosed = other.m_isClosed;
-
-    m_currentCharacter = other.m_currentCharacter;
+    m_pushedChar1 = other.m_pushedChar1;
+    m_pushedChar2 = other.m_pushedChar2;
+    m_currentString = other.m_currentString;
+    m_substrings = other.m_substrings;
+    if (m_pushedChar2)
+        m_currentChar = m_pushedChar2;
+    else if (m_pushedChar1)
+        m_currentChar = m_pushedChar1;
+    else
+        m_currentChar = m_currentString.m_length ? m_currentString.getCurrentChar() : 0;
 
-    m_numberOfCharactersConsumedPriorToCurrentSubstring = other.m_numberOfCharactersConsumedPriorToCurrentSubstring;
+    m_closed = other.m_closed;
+    m_empty = other.m_empty;
+    m_fastPathFlags = other.m_fastPathFlags;
+    m_numberOfCharactersConsumedPriorToCurrentString = other.m_numberOfCharactersConsumedPriorToCurrentString;
     m_numberOfCharactersConsumedPriorToCurrentLine = other.m_numberOfCharactersConsumedPriorToCurrentLine;
     m_currentLine = other.m_currentLine;
 
-    m_fastPathFlags = other.m_fastPathFlags;
-    m_advanceWithoutUpdatingLineNumberFunction = other.m_advanceWithoutUpdatingLineNumberFunction;
-    m_advanceAndUpdateLineNumberFunction = other.m_advanceAndUpdateLineNumberFunction;
-
-    other.clear();
+    m_advanceFunc = other.m_advanceFunc;
+    m_advanceAndUpdateLineNumberFunc = other.m_advanceAndUpdateLineNumberFunc;
 
     return *this;
 }
 
 unsigned SegmentedString::length() const
 {
-    unsigned length = m_currentSubstring.length;
-    for (auto& substring : m_otherSubstrings)
-        length += substring.length;
+    unsigned length = m_currentString.m_length;
+    if (m_pushedChar1) {
+        ++length;
+        if (m_pushedChar2)
+            ++length;
+    }
+    if (isComposite()) {
+        Deque<SegmentedSubstring>::const_iterator it = m_substrings.begin();
+        Deque<SegmentedSubstring>::const_iterator e = m_substrings.end();
+        for (; it != e; ++it)
+            length += it->m_length;
+    }
     return length;
 }
 
 void SegmentedString::setExcludeLineNumbers()
 {
-    if (!m_currentSubstring.doNotExcludeLineNumbers)
-        return;
-    m_currentSubstring.doNotExcludeLineNumbers = false;
-    for (auto& substring : m_otherSubstrings)
-        substring.doNotExcludeLineNumbers = false;
-    updateAdvanceFunctionPointers();
+    m_currentString.setExcludeLineNumbers();
+    if (isComposite()) {
+        Deque<SegmentedSubstring>::iterator it = m_substrings.begin();
+        Deque<SegmentedSubstring>::iterator e = m_substrings.end();
+        for (; it != e; ++it)
+            it->setExcludeLineNumbers();
+    }
 }
 
 void SegmentedString::clear()
 {
-    m_currentSubstring.length = 0;
-    m_otherSubstrings.clear();
-
-    m_isClosed = false;
-
-    m_currentCharacter = 0;
-
-    m_numberOfCharactersConsumedPriorToCurrentSubstring = 0;
+    m_pushedChar1 = 0;
+    m_pushedChar2 = 0;
+    m_currentChar = 0;
+    m_currentString.clear();
+    m_numberOfCharactersConsumedPriorToCurrentString = 0;
     m_numberOfCharactersConsumedPriorToCurrentLine = 0;
     m_currentLine = 0;
+    m_substrings.clear();
+    m_closed = false;
+    m_empty = true;
+    m_fastPathFlags = NoFastPath;
+    m_advanceFunc = &SegmentedString::advanceEmpty;
+    m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceEmpty;
+}
 
-    updateAdvanceFunctionPointersForEmptyString();
+void SegmentedString::append(const SegmentedSubstring& s)
+{
+    ASSERT(!m_closed);
+    if (!s.m_length)
+        return;
+
+    if (!m_currentString.m_length) {
+        m_numberOfCharactersConsumedPriorToCurrentString += m_currentString.numberOfCharactersConsumed();
+        m_currentString = s;
+        updateAdvanceFunctionPointers();
+    } else
+        m_substrings.append(s);
+    m_empty = false;
 }
 
-inline void SegmentedString::appendSubstring(Substring&& substring)
+void SegmentedString::pushBack(const SegmentedSubstring& s)
 {
-    ASSERT(!m_isClosed);
-    if (!substring.length)
+    ASSERT(!m_pushedChar1);
+    ASSERT(!s.numberOfCharactersConsumed());
+    if (!s.m_length)
         return;
-    if (m_currentSubstring.length)
-        m_otherSubstrings.append(WTFMove(substring));
-    else {
-        m_numberOfCharactersConsumedPriorToCurrentSubstring += m_currentSubstring.numberOfCharactersConsumed();
-        m_currentSubstring = WTFMove(substring);
-        m_currentCharacter = m_currentSubstring.currentCharacter();
+
+    // FIXME: We're assuming that the characters were originally consumed by
+    //        this SegmentedString.  We're also ASSERTing that s is a fresh
+    //        SegmentedSubstring.  These assumptions are sufficient for our
+    //        current use, but we might need to handle the more elaborate
+    //        cases in the future.
+    m_numberOfCharactersConsumedPriorToCurrentString += m_currentString.numberOfCharactersConsumed();
+    m_numberOfCharactersConsumedPriorToCurrentString -= s.m_length;
+    if (!m_currentString.m_length) {
+        m_currentString = s;
+        updateAdvanceFunctionPointers();
+    } else {
+        // Shift our m_currentString into our list.
+        m_substrings.prepend(m_currentString);
+        m_currentString = s;
         updateAdvanceFunctionPointers();
     }
+    m_empty = false;
 }
 
-void SegmentedString::pushBack(String&& string)
+void SegmentedString::close()
 {
-    // We never create a substring for an empty string.
-    ASSERT(string.length());
-
-    // The new substring we will create won't have the doNotExcludeLineNumbers set appropriately.
-    // That was lost when the characters were consumed before pushing them back. But this does
-    // not matter, because clients never use this for newlines. Catch that with this assertion.
-    ASSERT(!string.contains('\n'));
-
-    // The characters in the string must be previously consumed characters from this segmented string.
-    ASSERT(string.length() <= numberOfCharactersConsumed());
-
-    m_numberOfCharactersConsumedPriorToCurrentSubstring += m_currentSubstring.numberOfCharactersConsumed();
-    if (m_currentSubstring.length)
-        m_otherSubstrings.prepend(WTFMove(m_currentSubstring));
-    m_currentSubstring = WTFMove(string);
-    m_numberOfCharactersConsumedPriorToCurrentSubstring -= m_currentSubstring.length;
-    m_currentCharacter = m_currentSubstring.currentCharacter();
-    updateAdvanceFunctionPointers();
+    // Closing a stream twice is likely a coding mistake.
+    ASSERT(!m_closed);
+    m_closed = true;
 }
 
-void SegmentedString::close()
+void SegmentedString::append(const SegmentedString& s)
 {
-    ASSERT(!m_isClosed);
-    m_isClosed = true;
+    ASSERT(!m_closed);
+    ASSERT(!s.m_pushedChar1);
+    append(s.m_currentString);
+    if (s.isComposite()) {
+        Deque<SegmentedSubstring>::const_iterator it = s.m_substrings.begin();
+        Deque<SegmentedSubstring>::const_iterator e = s.m_substrings.end();
+        for (; it != e; ++it)
+            append(*it);
+    }
+    m_currentChar = m_pushedChar1 ? m_pushedChar1 : (m_currentString.m_length ? m_currentString.getCurrentChar() : 0);
 }
 
-void SegmentedString::append(const SegmentedString& string)
+void SegmentedString::pushBack(const SegmentedString& s)
 {
-    appendSubstring(Substring { string.m_currentSubstring });
-    for (auto& substring : string.m_otherSubstrings)
-        m_otherSubstrings.append(substring);
+    ASSERT(!m_pushedChar1);
+    ASSERT(!s.m_pushedChar1);
+    if (s.isComposite()) {
+        Deque<SegmentedSubstring>::const_reverse_iterator it = s.m_substrings.rbegin();
+        Deque<SegmentedSubstring>::const_reverse_iterator e = s.m_substrings.rend();
+        for (; it != e; ++it)
+            pushBack(*it);
+    }
+    pushBack(s.m_currentString);
+    m_currentChar = m_pushedChar1 ? m_pushedChar1 : (m_currentString.m_length ? m_currentString.getCurrentChar() : 0);
 }
 
-void SegmentedString::append(SegmentedString&& string)
+void SegmentedString::advanceSubstring()
 {
-    appendSubstring(WTFMove(string.m_currentSubstring));
-    for (auto& substring : string.m_otherSubstrings)
-        m_otherSubstrings.append(WTFMove(substring));
+    if (isComposite()) {
+        m_numberOfCharactersConsumedPriorToCurrentString += m_currentString.numberOfCharactersConsumed();
+        m_currentString = m_substrings.takeFirst();
+        // If we've previously consumed some characters of the non-current
+        // string, we now account for those characters as part of the current
+        // string, not as part of "prior to current string."
+        m_numberOfCharactersConsumedPriorToCurrentString -= m_currentString.numberOfCharactersConsumed();
+        updateAdvanceFunctionPointers();
+    } else {
+        m_currentString.clear();
+        m_empty = true;
+        m_fastPathFlags = NoFastPath;
+        m_advanceFunc = &SegmentedString::advanceEmpty;
+        m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceEmpty;
+    }
 }
 
-void SegmentedString::append(String&& string)
+String SegmentedString::toString() const
 {
-    appendSubstring(WTFMove(string));
+    StringBuilder result;
+    if (m_pushedChar1) {
+        result.append(m_pushedChar1);
+        if (m_pushedChar2)
+            result.append(m_pushedChar2);
+    }
+    m_currentString.appendTo(result);
+    if (isComposite()) {
+        Deque<SegmentedSubstring>::const_iterator it = m_substrings.begin();
+        Deque<SegmentedSubstring>::const_iterator e = m_substrings.end();
+        for (; it != e; ++it)
+            it->appendTo(result);
+    }
+    return result.toString();
 }
 
-void SegmentedString::append(const String& string)
+void SegmentedString::advancePastNonNewlines(unsigned count, UChar* consumedCharacters)
 {
-    appendSubstring(String { string });
+    ASSERT_WITH_SECURITY_IMPLICATION(count <= length());
+    for (unsigned i = 0; i < count; ++i) {
+        consumedCharacters[i] = currentChar();
+        advancePastNonNewline();
+    }
 }
 
-String SegmentedString::toString() const
+void SegmentedString::advance8()
 {
-    StringBuilder result;
-    m_currentSubstring.appendTo(result);
-    for (auto& substring : m_otherSubstrings)
-        substring.appendTo(result);
-    return result.toString();
+    ASSERT(!m_pushedChar1);
+    decrementAndCheckLength();
+    m_currentChar = m_currentString.incrementAndGetCurrentChar8();
+}
+
+void SegmentedString::advance16()
+{
+    ASSERT(!m_pushedChar1);
+    decrementAndCheckLength();
+    m_currentChar = m_currentString.incrementAndGetCurrentChar16();
 }
 
-void SegmentedString::advanceWithoutUpdatingLineNumber16()
+void SegmentedString::advanceAndUpdateLineNumber8()
 {
-    m_currentCharacter = *++m_currentSubstring.currentCharacter16;
+    ASSERT(!m_pushedChar1);
+    ASSERT(m_currentString.getCurrentChar() == m_currentChar);
+    if (m_currentChar == '\n') {
+        ++m_currentLine;
+        m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1;
+    }
     decrementAndCheckLength();
+    m_currentChar = m_currentString.incrementAndGetCurrentChar8();
 }
 
 void SegmentedString::advanceAndUpdateLineNumber16()
 {
-    ASSERT(m_currentSubstring.doNotExcludeLineNumbers);
-    processPossibleNewline();
-    m_currentCharacter = *++m_currentSubstring.currentCharacter16;
+    ASSERT(!m_pushedChar1);
+    ASSERT(m_currentString.getCurrentChar() == m_currentChar);
+    if (m_currentChar == '\n') {
+        ++m_currentLine;
+        m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1;
+    }
     decrementAndCheckLength();
+    m_currentChar = m_currentString.incrementAndGetCurrentChar16();
 }
 
-inline void SegmentedString::advancePastSingleCharacterSubstringWithoutUpdatingLineNumber()
+void SegmentedString::advanceSlowCase()
 {
-    ASSERT(m_currentSubstring.length == 1);
-    if (m_otherSubstrings.isEmpty()) {
-        m_currentSubstring.length = 0;
-        m_currentCharacter = 0;
-        updateAdvanceFunctionPointersForEmptyString();
-        return;
+    if (m_pushedChar1) {
+        m_pushedChar1 = m_pushedChar2;
+        m_pushedChar2 = 0;
+
+        if (m_pushedChar1) {
+            m_currentChar = m_pushedChar1;
+            return;
+        }
+
+        updateAdvanceFunctionPointers();
+    } else if (m_currentString.m_length) {
+        if (--m_currentString.m_length == 0)
+            advanceSubstring();
+    } else if (!isComposite()) {
+        m_currentString.clear();
+        m_empty = true;
+        m_fastPathFlags = NoFastPath;
+        m_advanceFunc = &SegmentedString::advanceEmpty;
+        m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceEmpty;
     }
-    m_numberOfCharactersConsumedPriorToCurrentSubstring += m_currentSubstring.numberOfCharactersConsumed();
-    m_currentSubstring = m_otherSubstrings.takeFirst();
-    // If we've previously consumed some characters of the non-current string, we now account for those
-    // characters as part of the current string, not as part of "prior to current string."
-    m_numberOfCharactersConsumedPriorToCurrentSubstring -= m_currentSubstring.numberOfCharactersConsumed();
-    m_currentCharacter = m_currentSubstring.currentCharacter();
-    updateAdvanceFunctionPointers();
+    m_currentChar = m_currentString.m_length ? m_currentString.getCurrentChar() : 0;
 }
 
-void SegmentedString::advancePastSingleCharacterSubstring()
+void SegmentedString::advanceAndUpdateLineNumberSlowCase()
 {
-    ASSERT(m_currentSubstring.length == 1);
-    ASSERT(m_currentSubstring.doNotExcludeLineNumbers);
-    processPossibleNewline();
-    advancePastSingleCharacterSubstringWithoutUpdatingLineNumber();
+    if (m_pushedChar1) {
+        m_pushedChar1 = m_pushedChar2;
+        m_pushedChar2 = 0;
+
+        if (m_pushedChar1) {
+            m_currentChar = m_pushedChar1;
+            return;
+        }
+
+        updateAdvanceFunctionPointers();
+    } else if (m_currentString.m_length) {
+        if (m_currentString.getCurrentChar() == '\n' && m_currentString.doNotExcludeLineNumbers()) {
+            ++m_currentLine;
+            // Plus 1 because numberOfCharactersConsumed value hasn't incremented yet; it does with m_length decrement below.
+            m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1;
+        }
+        if (--m_currentString.m_length == 0)
+            advanceSubstring();
+        else
+            m_currentString.incrementAndGetCurrentChar(); // Only need the ++
+    } else if (!isComposite()) {
+        m_currentString.clear();
+        m_empty = true;
+        m_fastPathFlags = NoFastPath;
+        m_advanceFunc = &SegmentedString::advanceEmpty;
+        m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceEmpty;
+    }
+
+    m_currentChar = m_currentString.m_length ? m_currentString.getCurrentChar() : 0;
 }
 
 void SegmentedString::advanceEmpty()
 {
-    ASSERT(!m_currentSubstring.length);
-    ASSERT(m_otherSubstrings.isEmpty());
-    ASSERT(!m_currentCharacter);
+    ASSERT(!m_currentString.m_length && !isComposite());
+    m_currentChar = 0;
 }
 
-void SegmentedString::updateAdvanceFunctionPointersForSingleCharacterSubstring()
+void SegmentedString::updateSlowCaseFunctionPointers()
 {
-    ASSERT(m_currentSubstring.length == 1);
     m_fastPathFlags = NoFastPath;
-    m_advanceWithoutUpdatingLineNumberFunction = &SegmentedString::advancePastSingleCharacterSubstringWithoutUpdatingLineNumber;
-    if (m_currentSubstring.doNotExcludeLineNumbers)
-        m_advanceAndUpdateLineNumberFunction = &SegmentedString::advancePastSingleCharacterSubstring;
-    else
-        m_advanceAndUpdateLineNumberFunction = &SegmentedString::advancePastSingleCharacterSubstringWithoutUpdatingLineNumber;
+    m_advanceFunc = &SegmentedString::advanceSlowCase;
+    m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceAndUpdateLineNumberSlowCase;
 }
 
 OrdinalNumber SegmentedString::currentLine() const
@@ -236,36 +364,18 @@ void SegmentedString::setCurrentPosition(OrdinalNumber line, OrdinalNumber colum
     m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + prologLength - columnAftreProlog.zeroBasedInt();
 }
 
-SegmentedString::AdvancePastResult SegmentedString::advancePastSlowCase(const char* literal, bool lettersIgnoringASCIICase)
+SegmentedString::AdvancePastResult SegmentedString::advancePastSlowCase(const char* literal, bool caseSensitive)
 {
-    constexpr unsigned maxLength = 10;
-    ASSERT(!strchr(literal, '\n'));
-    auto length = strlen(literal);
-    ASSERT(length <= maxLength);
+    unsigned length = strlen(literal);
     if (length > this->length())
         return NotEnoughCharacters;
-    UChar consumedCharacters[maxLength];
-    for (unsigned i = 0; i < length; ++i) {
-        auto character = m_currentCharacter;
-        if (characterMismatch(character, literal[i], lettersIgnoringASCIICase)) {
-            if (i)
-                pushBack(String { consumedCharacters, i });
-            return DidNotMatch;
-        }
-        advancePastNonNewline();
-        consumedCharacters[i] = character;
-    }
-    return DidMatch;
-}
-
-void SegmentedString::updateAdvanceFunctionPointersForEmptyString()
-{
-    ASSERT(!m_currentSubstring.length);
-    ASSERT(m_otherSubstrings.isEmpty());
-    ASSERT(!m_currentCharacter);
-    m_fastPathFlags = NoFastPath;
-    m_advanceWithoutUpdatingLineNumberFunction = &SegmentedString::advanceEmpty;
-    m_advanceAndUpdateLineNumberFunction = &SegmentedString::advanceEmpty;
+    UChar* consumedCharacters;
+    String consumedString = String::createUninitialized(length, consumedCharacters);
+    advancePastNonNewlines(length, consumedCharacters);
+    if (consumedString.startsWith(literal, caseSensitive))
+        return DidMatch;
+    pushBack(SegmentedString(consumedString));
+    return DidNotMatch;
 }
 
 }
index fe7f191..0813d60 100644 (file)
@@ -1,5 +1,5 @@
 /*
-    Copyright (C) 2004-2016 Apple Inc. All rights reserved.
+    Copyright (C) 2004-2008, 2015 Apple Inc. All rights reserved.
 
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Library General Public
     Boston, MA 02110-1301, USA.
 */
 
-#pragma once
+#ifndef SegmentedString_h
+#define SegmentedString_h
 
 #include <wtf/Deque.h>
-#include <wtf/text/WTFString.h>
+#include <wtf/text/StringBuilder.h>
 
 namespace WebCore {
 
-// FIXME: This should not start with "k".
-// FIXME: This is a shared tokenizer concept, not a SegmentedString concept, but this is the only common header for now.
-constexpr LChar kEndOfFileMarker = 0;
+class SegmentedString;
 
-class SegmentedString {
+class SegmentedSubstring {
 public:
-    SegmentedString() = default;
-    SegmentedString(String&&);
-    SegmentedString(const String&);
+    SegmentedSubstring()
+        : m_length(0)
+        , m_doNotExcludeLineNumbers(true)
+        , m_is8Bit(false)
+    {
+        m_data.string16Ptr = 0;
+    }
 
-    SegmentedString(SegmentedString&&) = delete;
-    SegmentedString(const SegmentedString&) = delete;
+    SegmentedSubstring(const String& str)
+        : m_length(str.length())
+        , m_doNotExcludeLineNumbers(true)
+        , m_string(str)
+    {
+        if (m_length) {
+            if (m_string.is8Bit()) {
+                m_is8Bit = true;
+                m_data.string8Ptr = m_string.characters8();
+            } else {
+                m_is8Bit = false;
+                m_data.string16Ptr = m_string.characters16();
+            }
+        } else
+            m_is8Bit = false;
+    }
 
-    SegmentedString& operator=(SegmentedString&&);
-    SegmentedString& operator=(const SegmentedString&) = default;
+    void clear() { m_length = 0; m_data.string16Ptr = 0; m_is8Bit = false;}
+    
+    bool is8Bit() { return m_is8Bit; }
+    
+    bool excludeLineNumbers() const { return !m_doNotExcludeLineNumbers; }
+    bool doNotExcludeLineNumbers() const { return m_doNotExcludeLineNumbers; }
 
-    void clear();
-    void close();
-
-    void append(SegmentedString&&);
-    void append(const SegmentedString&);
+    void setExcludeLineNumbers() { m_doNotExcludeLineNumbers = false; }
 
-    void append(String&&);
-    void append(const String&);
+    int numberOfCharactersConsumed() const { return m_string.length() - m_length; }
 
-    void pushBack(String&&);
+    void appendTo(StringBuilder& builder) const
+    {
+        int offset = m_string.length() - m_length;
 
-    void setExcludeLineNumbers();
+        if (!offset) {
+            if (m_length)
+                builder.append(m_string);
+        } else
+            builder.append(m_string.substring(offset, m_length));
+    }
 
-    bool isEmpty() const { return !m_currentSubstring.length; }
-    unsigned length() const;
+    UChar getCurrentChar8()
+    {
+        return *m_data.string8Ptr;
+    }
 
-    bool isClosed() const { return m_isClosed; }
+    UChar getCurrentChar16()
+    {
+        return m_data.string16Ptr ? *m_data.string16Ptr : 0;
+    }
 
-    void advance();
-    void advancePastNonNewline(); // Faster than calling advance when we know the current character is not a newline.
-    void advancePastNewline(); // Faster than calling advance when we know the current character is a newline.
+    UChar incrementAndGetCurrentChar8()
+    {
+        ASSERT(m_data.string8Ptr);
+        return *++m_data.string8Ptr;
+    }
 
-    enum AdvancePastResult { DidNotMatch, DidMatch, NotEnoughCharacters };
-    template<unsigned length> AdvancePastResult advancePast(const char (&literal)[length]) { return advancePast<length, false>(literal); }
-    template<unsigned length> AdvancePastResult advancePastLettersIgnoringASCIICase(const char (&literal)[length]) { return advancePast<length, true>(literal); }
+    UChar incrementAndGetCurrentChar16()
+    {
+        ASSERT(m_data.string16Ptr);
+        return *++m_data.string16Ptr;
+    }
 
-    unsigned numberOfCharactersConsumed() const;
+    String currentSubString(unsigned length)
+    {
+        int offset = m_string.length() - m_length;
+        return m_string.substring(offset, length);
+    }
 
-    String toString() const;
+    ALWAYS_INLINE UChar getCurrentChar()
+    {
+        ASSERT(m_length);
+        if (is8Bit())
+            return getCurrentChar8();
+        return getCurrentChar16();
+    }
+    
+    ALWAYS_INLINE UChar incrementAndGetCurrentChar()
+    {
+        ASSERT(m_length);
+        if (is8Bit())
+            return incrementAndGetCurrentChar8();
+        return incrementAndGetCurrentChar16();
+    }
 
-    UChar currentCharacter() const { return m_currentCharacter; }
+public:
+    union {
+        const LChar* string8Ptr;
+        const UChar* string16Ptr;
+    } m_data;
+    int m_length;
 
-    OrdinalNumber currentColumn() const;
-    OrdinalNumber currentLine() const;
+private:
+    bool m_doNotExcludeLineNumbers;
+    bool m_is8Bit;
+    String m_string;
+};
 
-    // Sets value of line/column variables. Column is specified indirectly by a parameter columnAfterProlog
-    // which is a value of column that we should get after a prolog (first prologLength characters) has been consumed.
-    void setCurrentPosition(OrdinalNumber line, OrdinalNumber columnAfterProlog, int prologLength);
+class SegmentedString {
+public:
+    SegmentedString()
+        : m_pushedChar1(0)
+        , m_pushedChar2(0)
+        , m_currentChar(0)
+        , m_numberOfCharactersConsumedPriorToCurrentString(0)
+        , m_numberOfCharactersConsumedPriorToCurrentLine(0)
+        , m_currentLine(0)
+        , m_closed(false)
+        , m_empty(true)
+        , m_fastPathFlags(NoFastPath)
+        , m_advanceFunc(&SegmentedString::advanceEmpty)
+        , m_advanceAndUpdateLineNumberFunc(&SegmentedString::advanceEmpty)
+    {
+    }
 
-private:
-    struct Substring {
-        Substring() = default;
-        Substring(String&&);
-
-        UChar currentCharacter() const;
-        UChar currentCharacterPreIncrement();
-
-        unsigned numberOfCharactersConsumed() const;
-        void appendTo(StringBuilder&) const;
-
-        String string;
-        unsigned length { 0 };
-        bool is8Bit;
-        union {
-            const LChar* currentCharacter8;
-            const UChar* currentCharacter16;
-        };
-        bool doNotExcludeLineNumbers { true };
-    };
+    SegmentedString(const String& str)
+        : m_pushedChar1(0)
+        , m_pushedChar2(0)
+        , m_currentString(str)
+        , m_currentChar(0)
+        , m_numberOfCharactersConsumedPriorToCurrentString(0)
+        , m_numberOfCharactersConsumedPriorToCurrentLine(0)
+        , m_currentLine(0)
+        , m_closed(false)
+        , m_empty(!str.length())
+        , m_fastPathFlags(NoFastPath)
+    {
+        if (m_currentString.m_length)
+            m_currentChar = m_currentString.getCurrentChar();
+        updateAdvanceFunctionPointers();
+    }
 
-    enum FastPathFlags {
-        NoFastPath = 0,
-        Use8BitAdvanceAndUpdateLineNumbers = 1 << 0,
-        Use8BitAdvance = 1 << 1,
-    };
+    SegmentedString(const SegmentedString&);
+    SegmentedString& operator=(const SegmentedString&);
 
-    void appendSubstring(Substring&&);
+    void clear();
+    void close();
 
-    void processPossibleNewline();
-    void startNewLine();
+    void append(const SegmentedString&);
+    void pushBack(const SegmentedString&);
 
-    void advanceWithoutUpdatingLineNumber();
-    void advanceWithoutUpdatingLineNumber16();
-    void advanceAndUpdateLineNumber16();
-    void advancePastSingleCharacterSubstringWithoutUpdatingLineNumber();
-    void advancePastSingleCharacterSubstring();
-    void advanceEmpty();
+    void setExcludeLineNumbers();
 
-    void updateAdvanceFunctionPointers();
-    void updateAdvanceFunctionPointersForEmptyString();
-    void updateAdvanceFunctionPointersForSingleCharacterSubstring();
+    void push(UChar c)
+    {
+        if (!m_pushedChar1) {
+            m_pushedChar1 = c;
+            m_currentChar = m_pushedChar1 ? m_pushedChar1 : m_currentString.getCurrentChar();
+            updateSlowCaseFunctionPointers();
+        } else {
+            ASSERT(!m_pushedChar2);
+            m_pushedChar2 = c;
+        }
+    }
 
-    void decrementAndCheckLength();
+    bool isEmpty() const { return m_empty; }
+    unsigned length() const;
 
-    template<typename CharacterType> static bool characterMismatch(CharacterType, char, bool lettersIgnoringASCIICase);
-    template<unsigned length, bool lettersIgnoringASCIICase> AdvancePastResult advancePast(const char (&literal)[length]);
-    AdvancePastResult advancePastSlowCase(const char* literal, bool lettersIgnoringASCIICase);
+    bool isClosed() const { return m_closed; }
 
-    Substring m_currentSubstring;
-    Deque<Substring> m_otherSubstrings;
+    enum AdvancePastResult { DidNotMatch, DidMatch, NotEnoughCharacters };
+    template<unsigned length> AdvancePastResult advancePast(const char (&literal)[length]) { return advancePast(literal, length - 1, true); }
+    template<unsigned length> AdvancePastResult advancePastIgnoringCase(const char (&literal)[length]) { return advancePast(literal, length - 1, false); }
 
-    bool m_isClosed { false };
+    void advance()
+    {
+        if (m_fastPathFlags & Use8BitAdvance) {
+            ASSERT(!m_pushedChar1);
+            bool haveOneCharacterLeft = (--m_currentString.m_length == 1);
+            m_currentChar = m_currentString.incrementAndGetCurrentChar8();
 
-    UChar m_currentCharacter { 0 };
+            if (!haveOneCharacterLeft)
+                return;
 
-    unsigned m_numberOfCharactersConsumedPriorToCurrentSubstring { 0 };
-    unsigned m_numberOfCharactersConsumedPriorToCurrentLine { 0 };
-    int m_currentLine { 0 };
+            updateSlowCaseFunctionPointers();
 
-    unsigned char m_fastPathFlags { NoFastPath };
-    void (SegmentedString::*m_advanceWithoutUpdatingLineNumberFunction)() { &SegmentedString::advanceEmpty };
-    void (SegmentedString::*m_advanceAndUpdateLineNumberFunction)() { &SegmentedString::advanceEmpty };
-};
+            return;
+        }
 
-inline SegmentedString::Substring::Substring(String&& passedString)
-    : string(WTFMove(passedString))
-    , length(string.length())
-{
-    if (length) {
-        is8Bit = string.impl()->is8Bit();
-        if (is8Bit)
-            currentCharacter8 = string.impl()->characters8();
-        else
-            currentCharacter16 = string.impl()->characters16();
+        (this->*m_advanceFunc)();
     }
-}
 
-inline unsigned SegmentedString::Substring::numberOfCharactersConsumed() const
-{
-    return string.length() - length;
-}
+    void advanceAndUpdateLineNumber()
+    {
+        if (m_fastPathFlags & Use8BitAdvance) {
+            ASSERT(!m_pushedChar1);
 
-ALWAYS_INLINE UChar SegmentedString::Substring::currentCharacter() const
-{
-    ASSERT(length);
-    return is8Bit ? *currentCharacter8 : *currentCharacter16;
-}
+            bool haveNewLine = (m_currentChar == '\n') & !!(m_fastPathFlags & Use8BitAdvanceAndUpdateLineNumbers);
+            bool haveOneCharacterLeft = (--m_currentString.m_length == 1);
 
-ALWAYS_INLINE UChar SegmentedString::Substring::currentCharacterPreIncrement()
-{
-    ASSERT(length);
-    return is8Bit ? *++currentCharacter8 : *++currentCharacter16;
-}
+            m_currentChar = m_currentString.incrementAndGetCurrentChar8();
 
-inline SegmentedString::SegmentedString(String&& string)
-    : m_currentSubstring(WTFMove(string))
-{
-    if (m_currentSubstring.length) {
-        m_currentCharacter = m_currentSubstring.currentCharacter();
-        updateAdvanceFunctionPointers();
+            if (!(haveNewLine | haveOneCharacterLeft))
+                return;
+
+            if (haveNewLine) {
+                ++m_currentLine;
+                m_numberOfCharactersConsumedPriorToCurrentLine =  m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed();
+            }
+
+            if (haveOneCharacterLeft)
+                updateSlowCaseFunctionPointers();
+
+            return;
+        }
+
+        (this->*m_advanceAndUpdateLineNumberFunc)();
     }
-}
 
-inline SegmentedString::SegmentedString(const String& string)
-    : SegmentedString(String { string })
-{
-}
+    void advancePastNonNewline()
+    {
+        ASSERT(currentChar() != '\n');
+        advance();
+    }
 
-ALWAYS_INLINE void SegmentedString::decrementAndCheckLength()
-{
-    ASSERT(m_currentSubstring.length > 1);
-    if (UNLIKELY(--m_currentSubstring.length == 1))
-        updateAdvanceFunctionPointersForSingleCharacterSubstring();
-}
+    void advancePastNewlineAndUpdateLineNumber()
+    {
+        ASSERT(currentChar() == '\n');
+        if (!m_pushedChar1 && m_currentString.m_length > 1) {
+            int newLineFlag = m_currentString.doNotExcludeLineNumbers();
+            m_currentLine += newLineFlag;
+            if (newLineFlag)
+                m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1;
+            decrementAndCheckLength();
+            m_currentChar = m_currentString.incrementAndGetCurrentChar();
+            return;
+        }
+        advanceAndUpdateLineNumberSlowCase();
+    }
 
-ALWAYS_INLINE void SegmentedString::advanceWithoutUpdatingLineNumber()
-{
-    if (LIKELY(m_fastPathFlags & Use8BitAdvance)) {
-        m_currentCharacter = *++m_currentSubstring.currentCharacter8;
-        decrementAndCheckLength();
-        return;
+    int numberOfCharactersConsumed() const
+    {
+        int numberOfPushedCharacters = 0;
+        if (m_pushedChar1) {
+            ++numberOfPushedCharacters;
+            if (m_pushedChar2)
+                ++numberOfPushedCharacters;
+        }
+        return m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed() - numberOfPushedCharacters;
     }
 
-    (this->*m_advanceWithoutUpdatingLineNumberFunction)();
-}
+    String toString() const;
 
-inline void SegmentedString::startNewLine()
-{
-    ++m_currentLine;
-    m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed();
-}
+    UChar currentChar() const { return m_currentChar; }    
 
-inline void SegmentedString::processPossibleNewline()
-{
-    if (m_currentCharacter == '\n')
-        startNewLine();
-}
+    OrdinalNumber currentColumn() const;
+    OrdinalNumber currentLine() const;
 
-inline void SegmentedString::advance()
-{
-    if (LIKELY(m_fastPathFlags & Use8BitAdvance)) {
-        ASSERT(m_currentSubstring.length > 1);
-        bool lastCharacterWasNewline = m_currentCharacter == '\n';
-        m_currentCharacter = *++m_currentSubstring.currentCharacter8;
-        bool haveOneCharacterLeft = --m_currentSubstring.length == 1;
-        if (LIKELY(!(lastCharacterWasNewline | haveOneCharacterLeft)))
-            return;
-        if (lastCharacterWasNewline & !!(m_fastPathFlags & Use8BitAdvanceAndUpdateLineNumbers))
-            startNewLine();
-        if (haveOneCharacterLeft)
-            updateAdvanceFunctionPointersForSingleCharacterSubstring();
-        return;
-    }
+    // Sets value of line/column variables. Column is specified indirectly by a parameter columnAfterProlog
+    // which is a value of column that we should get after a prolog (first prologLength characters) has been consumed.
+    void setCurrentPosition(OrdinalNumber line, OrdinalNumber columnAfterProlog, int prologLength);
 
-    (this->*m_advanceAndUpdateLineNumberFunction)();
-}
+private:
+    enum FastPathFlags {
+        NoFastPath = 0,
+        Use8BitAdvanceAndUpdateLineNumbers = 1 << 0,
+        Use8BitAdvance = 1 << 1,
+    };
 
-ALWAYS_INLINE void SegmentedString::advancePastNonNewline()
-{
-    ASSERT(m_currentCharacter != '\n');
-    advanceWithoutUpdatingLineNumber();
-}
+    void append(const SegmentedSubstring&);
+    void pushBack(const SegmentedSubstring&);
 
-inline void SegmentedString::advancePastNewline()
-{
-    ASSERT(m_currentCharacter == '\n');
-    if (m_currentSubstring.length > 1) {
-        if (m_currentSubstring.doNotExcludeLineNumbers)
-            startNewLine();
-        m_currentCharacter = m_currentSubstring.currentCharacterPreIncrement();
-        decrementAndCheckLength();
-        return;
+    void advance8();
+    void advance16();
+    void advanceAndUpdateLineNumber8();
+    void advanceAndUpdateLineNumber16();
+    void advanceSlowCase();
+    void advanceAndUpdateLineNumberSlowCase();
+    void advanceEmpty();
+    void advanceSubstring();
+    
+    void updateSlowCaseFunctionPointers();
+
+    void decrementAndCheckLength()
+    {
+        ASSERT(m_currentString.m_length > 1);
+        if (--m_currentString.m_length == 1)
+            updateSlowCaseFunctionPointers();
     }
 
-    (this->*m_advanceAndUpdateLineNumberFunction)();
-}
+    void updateAdvanceFunctionPointers()
+    {
+        if ((m_currentString.m_length > 1) && !m_pushedChar1) {
+            if (m_currentString.is8Bit()) {
+                m_advanceFunc = &SegmentedString::advance8;
+                m_fastPathFlags = Use8BitAdvance;
+                if (m_currentString.doNotExcludeLineNumbers()) {
+                    m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceAndUpdateLineNumber8;
+                    m_fastPathFlags |= Use8BitAdvanceAndUpdateLineNumbers;
+                } else
+                    m_advanceAndUpdateLineNumberFunc = &SegmentedString::advance8;
+                return;
+            }
 
-inline unsigned SegmentedString::numberOfCharactersConsumed() const
-{
-    return m_numberOfCharactersConsumedPriorToCurrentSubstring + m_currentSubstring.numberOfCharactersConsumed();
-}
+            m_advanceFunc = &SegmentedString::advance16;
+            m_fastPathFlags = NoFastPath;
+            if (m_currentString.doNotExcludeLineNumbers())
+                m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceAndUpdateLineNumber16;
+            else
+                m_advanceAndUpdateLineNumberFunc = &SegmentedString::advance16;
+            return;
+        }
+
+        if (!m_currentString.m_length && !isComposite()) {
+            m_advanceFunc = &SegmentedString::advanceEmpty;
+            m_fastPathFlags = NoFastPath;
+            m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceEmpty;
+        }
+
+        updateSlowCaseFunctionPointers();
+    }
+
+    // Writes consumed characters into consumedCharacters, which must have space for at least |count| characters.
+    void advancePastNonNewlines(unsigned count);
+    void advancePastNonNewlines(unsigned count, UChar* consumedCharacters);
+
+    AdvancePastResult advancePast(const char* literal, unsigned length, bool caseSensitive);
+    AdvancePastResult advancePastSlowCase(const char* literal, bool caseSensitive);
+
+    bool isComposite() const { return !m_substrings.isEmpty(); }
+
+    UChar m_pushedChar1;
+    UChar m_pushedChar2;
+    SegmentedSubstring m_currentString;
+    UChar m_currentChar;
+    int m_numberOfCharactersConsumedPriorToCurrentString;
+    int m_numberOfCharactersConsumedPriorToCurrentLine;
+    int m_currentLine;
+    Deque<SegmentedSubstring> m_substrings;
+    bool m_closed;
+    bool m_empty;
+    unsigned char m_fastPathFlags;
+    void (SegmentedString::*m_advanceFunc)();
+    void (SegmentedString::*m_advanceAndUpdateLineNumberFunc)();
+};
 
-template<typename CharacterType> ALWAYS_INLINE bool SegmentedString::characterMismatch(CharacterType a, char b, bool lettersIgnoringASCIICase)
+inline void SegmentedString::advancePastNonNewlines(unsigned count)
 {
-    return lettersIgnoringASCIICase ? !isASCIIAlphaCaselessEqual(a, b) : a != b;
+    for (unsigned i = 0; i < count; ++i)
+        advancePastNonNewline();
 }
 
-template<unsigned lengthIncludingTerminator, bool lettersIgnoringASCIICase> SegmentedString::AdvancePastResult SegmentedString::advancePast(const char (&literal)[lengthIncludingTerminator])
+inline SegmentedString::AdvancePastResult SegmentedString::advancePast(const char* literal, unsigned length, bool caseSensitive)
 {
-    constexpr unsigned length = lengthIncludingTerminator - 1;
-    ASSERT(!literal[length]);
+    ASSERT(strlen(literal) == length);
     ASSERT(!strchr(literal, '\n'));
-    if (length + 1 < m_currentSubstring.length) {
-        if (m_currentSubstring.is8Bit) {
-            for (unsigned i = 0; i < length; ++i) {
-                if (characterMismatch(m_currentSubstring.currentCharacter8[i], literal[i], lettersIgnoringASCIICase))
-                    return DidNotMatch;
-            }
-            m_currentSubstring.currentCharacter8 += length;
-            m_currentCharacter = *m_currentSubstring.currentCharacter8;
-        } else {
-            for (unsigned i = 0; i < length; ++i) {
-                if (characterMismatch(m_currentSubstring.currentCharacter16[i], literal[i], lettersIgnoringASCIICase))
-                    return DidNotMatch;
-            }
-            m_currentSubstring.currentCharacter16 += length;
-            m_currentCharacter = *m_currentSubstring.currentCharacter16;
+    if (!m_pushedChar1) {
+        if (length <= static_cast<unsigned>(m_currentString.m_length)) {
+            if (!m_currentString.currentSubString(length).startsWith(literal, caseSensitive))
+                return DidNotMatch;
+            advancePastNonNewlines(length);
+            return DidMatch;
         }
-        m_currentSubstring.length -= length;
-        return DidMatch;
     }
-    return advancePastSlowCase(literal, lettersIgnoringASCIICase);
+    return advancePastSlowCase(literal, caseSensitive);
 }
 
-inline void SegmentedString::updateAdvanceFunctionPointers()
-{
-    if (m_currentSubstring.length > 1) {
-        if (m_currentSubstring.is8Bit) {
-            m_fastPathFlags = Use8BitAdvance;
-            if (m_currentSubstring.doNotExcludeLineNumbers)
-                m_fastPathFlags |= Use8BitAdvanceAndUpdateLineNumbers;
-            return;
-        }
-        m_fastPathFlags = NoFastPath;
-        m_advanceWithoutUpdatingLineNumberFunction = &SegmentedString::advanceWithoutUpdatingLineNumber16;
-        if (m_currentSubstring.doNotExcludeLineNumbers)
-            m_advanceAndUpdateLineNumberFunction = &SegmentedString::advanceAndUpdateLineNumber16;
-        else
-            m_advanceAndUpdateLineNumberFunction = &SegmentedString::advanceWithoutUpdatingLineNumber16;
-        return;
-    }
-
-    if (!m_currentSubstring.length) {
-        updateAdvanceFunctionPointersForEmptyString();
-        return;
-    }
-
-    updateAdvanceFunctionPointersForSingleCharacterSubstring();
 }
 
-}
+#endif
index 7686268..3b3626a 100644 (file)
@@ -30,9 +30,9 @@
 
 namespace WebCore {
 
-inline void unconsumeCharacters(SegmentedString& source, StringBuilder& consumedCharacters)
+inline void unconsumeCharacters(SegmentedString& source, const StringBuilder& consumedCharacters)
 {
-    source.pushBack(consumedCharacters.toString());
+    source.pushBack(SegmentedString(consumedCharacters.toStringPreserveCapacity()));
 }
 
 template <typename ParserFunctions>
@@ -56,7 +56,7 @@ bool consumeCharacterReference(SegmentedString& source, StringBuilder& decodedCh
     StringBuilder consumedCharacters;
     
     while (!source.isEmpty()) {
-        UChar character = source.currentCharacter();
+        UChar character = source.currentChar();
         switch (state) {
         case Initial:
             if (character == '\x09' || character == '\x0A' || character == '\x0C' || character == ' ' || character == '<' || character == '&')
@@ -85,21 +85,21 @@ bool consumeCharacterReference(SegmentedString& source, StringBuilder& decodedCh
                 state = Decimal;
                 goto Decimal;
             }
-            source.pushBack(ASCIILiteral("#"));
+            source.pushBack(SegmentedString(ASCIILiteral("#")));
             return false;
         case MaybeHexLowerCaseX:
             if (isASCIIHexDigit(character)) {
                 state = Hex;
                 goto Hex;
             }
-            source.pushBack(ASCIILiteral("#x"));
+            source.pushBack(SegmentedString(ASCIILiteral("#x")));
             return false;
         case MaybeHexUpperCaseX:
             if (isASCIIHexDigit(character)) {
                 state = Hex;
                 goto Hex;
             }
-            source.pushBack(ASCIILiteral("#X"));
+            source.pushBack(SegmentedString(ASCIILiteral("#X")));
             return false;
         case Hex:
         Hex:
@@ -110,7 +110,7 @@ bool consumeCharacterReference(SegmentedString& source, StringBuilder& decodedCh
                 break;
             }
             if (character == ';') {
-                source.advancePastNonNewline();
+                source.advance();
                 decodedCharacter.append(ParserFunctions::legalEntityFor(overflow ? 0 : result));
                 return true;
             }
@@ -129,7 +129,7 @@ bool consumeCharacterReference(SegmentedString& source, StringBuilder& decodedCh
                 break;
             }
             if (character == ';') {
-                source.advancePastNonNewline();
+                source.advance();
                 decodedCharacter.append(ParserFunctions::legalEntityFor(overflow ? 0 : result));
                 return true;
             }
@@ -144,7 +144,7 @@ bool consumeCharacterReference(SegmentedString& source, StringBuilder& decodedCh
             return ParserFunctions::consumeNamedEntity(source, decodedCharacter, notEnoughCharacters, additionalAllowedCharacter, character);
         }
         consumedCharacters.append(character);
-        source.advancePastNonNewline();
+        source.advance();
     }
     ASSERT(source.isEmpty());
     notEnoughCharacters = true;
index 58ef106..f2a2f26 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2008-2016 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2008, 2015 Apple Inc. All Rights Reserved.
  * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
  *
@@ -27,6 +27,8 @@
 
 #pragma once
 
+#include "SegmentedString.h"
+
 #if COMPILER(MSVC)
 // Disable the "unreachable code" warning so we can compile the ASSERT_NOT_REACHED in the END_STATE macro.
 #pragma warning(disable: 4702)
@@ -42,7 +44,7 @@ inline bool isTokenizerWhitespace(UChar character)
 #define BEGIN_STATE(stateName)                                  \
     case stateName:                                             \
     stateName: {                                                \
-        constexpr auto currentState = stateName;                \
+        const auto currentState = stateName;                    \
         UNUSED_PARAM(currentState);
 
 #define END_STATE()                                             \
@@ -72,15 +74,6 @@ inline bool isTokenizerWhitespace(UChar character)
         character = m_preprocessor.nextInputCharacter();        \
         goto newState;                                          \
     } while (false)
-#define ADVANCE_PAST_NON_NEWLINE_TO(newState)                   \
-    do {                                                        \
-        if (!m_preprocessor.advancePastNonNewline(source, isNullCharacterSkippingState(newState))) { \
-            m_state = newState;                                 \
-            return haveBufferedCharacterToken();                \
-        }                                                       \
-        character = m_preprocessor.nextInputCharacter();        \
-        goto newState;                                          \
-    } while (false)
 
 // For more complex cases, caller consumes the characters first and then uses this macro.
 #define SWITCH_TO(newState)                                     \
index 92605fc..ce20104 100644 (file)
@@ -100,15 +100,14 @@ void XMLDocumentParser::clearCurrentNodeStack()
     }
 }
 
-void XMLDocumentParser::insert(SegmentedString&&)
+void XMLDocumentParser::insert(const SegmentedString&)
 {
     ASSERT_NOT_REACHED();
 }
 
 void XMLDocumentParser::append(RefPtr<StringImpl>&& inputSource)
 {
-    String source { WTFMove(inputSource) };
-
+    SegmentedString source(WTFMove(inputSource));
     if (m_sawXSLTransform || !m_sawFirstElement)
         m_originalSourceForTransform.append(source);
 
@@ -120,7 +119,7 @@ void XMLDocumentParser::append(RefPtr<StringImpl>&& inputSource)
         return;
     }
 
-    doWrite(source);
+    doWrite(source.toString());
 
     // After parsing, dispatch image beforeload events.
     ImageLoader::dispatchPendingBeforeLoadEvents();
@@ -153,6 +152,7 @@ static inline String toString(const xmlChar* string, size_t size)
     return String::fromUTF8(reinterpret_cast<const char*>(string), size); 
 }
 
+
 bool XMLDocumentParser::updateLeafTextNode()
 {
     if (isStopped())
index 20bce4a..c691768 100644 (file)
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2000 Peter Kelly (pmk@post.com)
- * Copyright (C) 2005-2016 Apple Inc. All rights reserved.
+ * Copyright (C) 2005, 2006, 2007 Apple Inc. All rights reserved.
  * Copyright (C) 2007 Samuel Weinig (sam@webkit.org)
  * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
  * Copyright (C) 2008, 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
 #include "ScriptableDocumentParser.h"
 #include "SegmentedString.h"
 #include "XMLErrors.h"
-#include <libxml/tree.h>
-#include <libxml/xmlstring.h>
 #include <wtf/HashMap.h>
 #include <wtf/text/AtomicStringHash.h>
 #include <wtf/text/CString.h>
 
+#include <libxml/tree.h>
+#include <libxml/xmlstring.h>
+
 namespace WebCore {
 
 class ContainerNode;
 class CachedResourceLoader;
 class DocumentFragment;
+class Document;
 class Element;
 class FrameView;
 class PendingCallbacks;
+class PendingScript;
 class Text;
 
-class XMLParserContext : public RefCounted<XMLParserContext> {
-public:
-    static RefPtr<XMLParserContext> createMemoryParser(xmlSAXHandlerPtr, void* userData, const CString& chunk);
-    static Ref<XMLParserContext> createStringParser(xmlSAXHandlerPtr, void* userData);
-    ~XMLParserContext();
-    xmlParserCtxtPtr context() const { return m_context; }
-
-private:
-    XMLParserContext(xmlParserCtxtPtr context)
-        : m_context(context)
-    {
-    }
-    xmlParserCtxtPtr m_context;
-};
-
-class XMLDocumentParser final : public ScriptableDocumentParser, public PendingScriptClient {
-    WTF_MAKE_FAST_ALLOCATED;
-public:
-    static Ref<XMLDocumentParser> create(Document& document, FrameView* view)
-    {
-        return adoptRef(*new XMLDocumentParser(document, view));
-    }
-    static Ref<XMLDocumentParser> create(DocumentFragment& fragment, Element* element, ParserContentPolicy parserContentPolicy)
-    {
-        return adoptRef(*new XMLDocumentParser(fragment, element, parserContentPolicy));
-    }
-
-    ~XMLDocumentParser();
-
-    // Exposed for callbacks:
-    void handleError(XMLErrors::ErrorType, const char* message, TextPosition);
-
-    void setIsXHTMLDocument(bool isXHTML) { m_isXHTMLDocument = isXHTML; }
-    bool isXHTMLDocument() const { return m_isXHTMLDocument; }
+    class XMLParserContext : public RefCounted<XMLParserContext> {
+    public:
+        static RefPtr<XMLParserContext> createMemoryParser(xmlSAXHandlerPtr, void* userData, const CString& chunk);
+        static Ref<XMLParserContext> createStringParser(xmlSAXHandlerPtr, void* userData);
+        ~XMLParserContext();
+        xmlParserCtxtPtr context() const { return m_context; }
 
-    static bool parseDocumentFragment(const String&, DocumentFragment&, Element* parent = nullptr, ParserContentPolicy = AllowScriptingContent);
+    private:
+        XMLParserContext(xmlParserCtxtPtr context)
+            : m_context(context)
+        {
+        }
+        xmlParserCtxtPtr m_context;
+    };
 
-    // Used by XMLHttpRequest to check if the responseXML was well formed.
-    bool wellFormed() const final { return !m_sawError; }
+    class XMLDocumentParser final : public ScriptableDocumentParser, public PendingScriptClient {
+        WTF_MAKE_FAST_ALLOCATED;
+    public:
+        static Ref<XMLDocumentParser> create(Document& document, FrameView* view)
+        {
+            return adoptRef(*new XMLDocumentParser(document, view));
+        }
+        static Ref<XMLDocumentParser> create(DocumentFragment& fragment, Element* element, ParserContentPolicy parserContentPolicy)
+        {
+            return adoptRef(*new XMLDocumentParser(fragment, element, parserContentPolicy));
+        }
+
+        ~XMLDocumentParser();
 
-    static bool supportsXMLVersion(const String&);
+        // Exposed for callbacks:
+        void handleError(XMLErrors::ErrorType, const char* message, TextPosition);
 
-private:
-    explicit XMLDocumentParser(Document&, FrameView* = nullptr);
-    XMLDocumentParser(DocumentFragment&, Element*, ParserContentPolicy);
+        void setIsXHTMLDocument(bool isXHTML) { m_isXHTMLDocument = isXHTML; }
+        bool isXHTMLDocument() const { return m_isXHTMLDocument; }
 
-    void insert(SegmentedString&&) final;
-    void append(RefPtr<StringImpl>&&) final;
-    void finish() final;
-    bool isWaitingForScripts() const final;
-    void stopParsing() final;
-    void detach() final;
+        static bool parseDocumentFragment(const String&, DocumentFragment&, Element* parent = nullptr, ParserContentPolicy = AllowScriptingContent);
 
-    TextPosition textPosition() const final;
-    bool shouldAssociateConsoleMessagesWithTextPosition() const final;
+        // Used by the XMLHttpRequest to check if the responseXML was well formed.
+        bool wellFormed() const override { return !m_sawError; }
 
-    void notifyFinished(PendingScript&) final;
+        static bool supportsXMLVersion(const String&);
 
-    void end();
+    private:
+        XMLDocumentParser(Document&, FrameView* = nullptr);
+        XMLDocumentParser(DocumentFragment&, Element*, ParserContentPolicy);
 
-    void pauseParsing();
-    void resumeParsing();
+        // From DocumentParser
+        void insert(const SegmentedString&) override;
+        void append(RefPtr<StringImpl>&&) override;
+        void finish() override;
+        bool isWaitingForScripts() const override;
+        void stopParsing() override;
+        void detach() override;
 
-    bool appendFragmentSource(const String&);
+        TextPosition textPosition() const override;
+        bool shouldAssociateConsoleMessagesWithTextPosition() const override;
 
-public:
-    // Callbacks from parser SAX, and other functions needed inside
-    // the parser implementation, but outside this class.
+        void notifyFinished(PendingScript&) final;
 
-    void error(XMLErrors::ErrorType, const char* message, va_list args) WTF_ATTRIBUTE_PRINTF(3, 0);
-    void startElementNs(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI,
-        int numNamespaces, const xmlChar** namespaces,
-        int numAttributes, int numDefaulted, const xmlChar** libxmlAttributes);
-    void endElementNs();
-    void characters(const xmlChar*, int length);
-    void processingInstruction(const xmlChar* target, const xmlChar* data);
-    void cdataBlock(const xmlChar*, int length);
-    void comment(const xmlChar*);
-    void startDocument(const xmlChar* version, const xmlChar* encoding, int standalone);
-    void internalSubset(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID);
-    void endDocument();
+        void end();
 
-    bool isParsingEntityDeclaration() const { return m_isParsingEntityDeclaration; }
-    void setIsParsingEntityDeclaration(bool value) { m_isParsingEntityDeclaration = value; }
+        void pauseParsing();
+        void resumeParsing();
 
-    int depthTriggeringEntityExpansion() const { return m_depthTriggeringEntityExpansion; }
-    void setDepthTriggeringEntityExpansion(int depth) { m_depthTriggeringEntityExpansion = depth; }
+        bool appendFragmentSource(const String&);
 
-private:
-    void initializeParserContext(const CString& chunk = CString());
+    public:
+        // callbacks from parser SAX
+        void error(XMLErrors::ErrorType, const char* message, va_list args) WTF_ATTRIBUTE_PRINTF(3, 0);
+        void startElementNs(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int nb_namespaces,
+                            const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** libxmlAttributes);
+        void endElementNs();
+        void characters(const xmlChar* s, int len);
+        void processingInstruction(const xmlChar* target, const xmlChar* data);
+        void cdataBlock(const xmlChar* s, int len);
+        void comment(const xmlChar* s);
+        void startDocument(const xmlChar* version, const xmlChar* encoding, int standalone);
+        void internalSubset(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID);
+        void endDocument();
 
-    void pushCurrentNode(ContainerNode*);
-    void popCurrentNode();
-    void clearCurrentNodeStack();
+        bool isParsingEntityDeclaration() const { return m_isParsingEntityDeclaration; }
+        void setIsParsingEntityDeclaration(bool value) { m_isParsingEntityDeclaration = value; }
 
-    void insertErrorMessageBlock();
+        int depthTriggeringEntityExpansion() const { return m_depthTriggeringEntityExpansion; }
+        void setDepthTriggeringEntityExpansion(int depth) { m_depthTriggeringEntityExpansion = depth; }
 
-    void createLeafTextNode();
-    bool updateLeafTextNode();
+    private:
+        void initializeParserContext(const CString& chunk = CString());
 
-    void doWrite(const String&);
-    void doEnd();
+        void pushCurrentNode(ContainerNode*);
+        void popCurrentNode();
+        void clearCurrentNodeStack();
 
-    xmlParserCtxtPtr context() const { return m_context ? m_context->context() : nullptr; };
+        void insertErrorMessageBlock();
 
-    FrameView* m_view { nullptr };
+        void createLeafTextNode();
+        bool updateLeafTextNode();
 
-    SegmentedString m_originalSourceForTransform;
+        void doWrite(const String&);
+        void doEnd();
+
+        FrameView* m_view;
+
+        SegmentedString m_originalSourceForTransform;
 
-    RefPtr<XMLParserContext> m_context;
-    std::unique_ptr<PendingCallbacks> m_pendingCallbacks;
-    Vector<xmlChar> m_bufferedText;
-    int m_depthTriggeringEntityExpansion { -1 };
-    bool m_isParsingEntityDeclaration { false };
+        xmlParserCtxtPtr context() const { return m_context ? m_context->context() : nullptr; };
+        RefPtr<XMLParserContext> m_context;
+        std::unique_ptr<PendingCallbacks> m_pendingCallbacks;
+        Vector<xmlChar> m_bufferedText;
+        int m_depthTriggeringEntityExpansion;
+        bool m_isParsingEntityDeclaration;
 
-    ContainerNode* m_currentNode { nullptr };
-    Vector<ContainerNode*> m_currentNodeStack;
+        ContainerNode* m_currentNode;
+        Vector<ContainerNode*> m_currentNodeStack;
 
-    RefPtr<Text> m_leafTextNode;
+        RefPtr<Text> m_leafTextNode;
 
-    bool m_sawError { false };
-    bool m_sawCSS { false };
-    bool m_sawXSLTransform { false };
-    bool m_sawFirstElement { false };
-    bool m_isXHTMLDocument { false };
-    bool m_parserPaused { false };
-    bool m_requestingScript { false };
-    bool m_finishCalled { false };
+        bool m_sawError;
+        bool m_sawCSS;
+        bool m_sawXSLTransform;
+        bool m_sawFirstElement;
+        bool m_isXHTMLDocument;
+        bool m_parserPaused;
+        bool m_requestingScript;
+        bool m_finishCalled;
 
-    std::unique_ptr<XMLErrors> m_xmlErrors;
+        std::unique_ptr<XMLErrors> m_xmlErrors;
 
-    RefPtr<PendingScript> m_pendingScript;
-    TextPosition m_scriptStartPosition;
+        RefPtr<PendingScript> m_pendingScript;
+        TextPosition m_scriptStartPosition;
 
-    bool m_parsingFragment { false };
-    AtomicString m_defaultNamespaceURI;
+        bool m_parsingFragment;
+        AtomicString m_defaultNamespaceURI;
 
-    HashMap<AtomicString, AtomicString> m_prefixToNamespaceMap;
-    SegmentedString m_pendingSrc;
-};
+        typedef HashMap<AtomicString, AtomicString> PrefixForNamespaceMap;
+        PrefixForNamespaceMap m_prefixToNamespaceMap;
+        SegmentedString m_pendingSrc;
+    };
 
 #if ENABLE(XSLT)
 void* xmlDocPtrForString(CachedResourceLoader&, const String& source, const String& url);
index 2e7df2d..9f4af7a 100644 (file)
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2000 Peter Kelly <pmk@post.com>
- * Copyright (C) 2005-2016 Apple Inc. All rights reserved.
+ * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved.
  * Copyright (C) 2006 Alexey Proskuryakov <ap@webkit.org>
  * Copyright (C) 2007 Samuel Weinig <sam@webkit.org>
  * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
 #include "DocumentFragment.h"
 #include "DocumentType.h"
 #include "Frame.h"
+#include "FrameLoader.h"
+#include "FrameView.h"
 #include "HTMLEntityParser.h"
 #include "HTMLHtmlElement.h"
+#include "HTMLLinkElement.h"
+#include "HTMLNames.h"
+#include "HTMLStyleElement.h"
 #include "HTMLTemplateElement.h"
+#include "LoadableClassicScript.h"
 #include "Page.h"
 #include "PendingScript.h"
 #include "ProcessingInstruction.h"
 #include "ResourceError.h"
+#include "ResourceRequest.h"
 #include "ResourceResponse.h"
 #include "ScriptElement.h"
 #include "ScriptSourceCode.h"
+#include "SecurityOrigin.h"
 #include "Settings.h"
 #include "StyleScope.h"
+#include "TextResourceDecoder.h"
 #include "TransformSource.h"
 #include "XMLNSNames.h"
 #include "XMLDocumentParserScope.h"
 #include <libxml/parserInternals.h>
+#include <wtf/Ref.h>
 #include <wtf/StringExtras.h>
+#include <wtf/Threading.h>
+#include <wtf/Vector.h>
 #include <wtf/unicode/UTF8.h>
 
 #if ENABLE(XSLT)
 namespace WebCore {
 
 #if ENABLE(XSLT)
-
-static inline bool shouldRenderInXMLTreeViewerMode(Document& document)
+static inline bool hasNoStyleInformation(Document* document)
 {
-    if (document.sawElementsInKnownNamespaces())
+    if (document->sawElementsInKnownNamespaces())
         return false;
 
-    if (document.transformSourceDocument())
+    if (document->transformSourceDocument())
         return false;
 
-    auto* frame = document.frame();
-    if (!frame)
+    if (!document->frame() || !document->frame()->page())
         return false;
 
-    if (!frame->settings().developerExtrasEnabled())
+    if (!document->frame()->page()->settings().developerExtrasEnabled())
         return false;
 
-    if (frame->tree().parent())
+    if (document->frame()->tree().parent())
         return false; // This document is not in a top frame
 
     return true;
 }
-
 #endif
 
 class PendingCallbacks {
-    WTF_MAKE_FAST_ALLOCATED;
+    WTF_MAKE_NONCOPYABLE(PendingCallbacks); WTF_MAKE_FAST_ALLOCATED;
 public:
+    PendingCallbacks() = default;
+
     void appendStartElementNSCallback(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int numNamespaces, const xmlChar** namespaces, int numAttributes, int numDefaulted, const xmlChar** attributes)
     {
         auto callback = std::make_unique<PendingStartElementNSCallback>();
@@ -564,16 +575,40 @@ bool XMLDocumentParser::supportsXMLVersion(const String& version)
 XMLDocumentParser::XMLDocumentParser(Document& document, FrameView* frameView)
     : ScriptableDocumentParser(document)
     , m_view(frameView)
+    , m_context(nullptr)
     , m_pendingCallbacks(std::make_unique<PendingCallbacks>())
+    , m_depthTriggeringEntityExpansion(-1)
+    , m_isParsingEntityDeclaration(false)
     , m_currentNode(&document)
+    , m_sawError(false)
+    , m_sawCSS(false)
+    , m_sawXSLTransform(false)
+    , m_sawFirstElement(false)
+    , m_isXHTMLDocument(false)
+    , m_parserPaused(false)
+    , m_requestingScript(false)
+    , m_finishCalled(false)
     , m_scriptStartPosition(TextPosition::belowRangePosition())
+    , m_parsingFragment(false)
 {
 }
 
 XMLDocumentParser::XMLDocumentParser(DocumentFragment& fragment, Element* parentElement, ParserContentPolicy parserContentPolicy)
     : ScriptableDocumentParser(fragment.document(), parserContentPolicy)
+    , m_view(nullptr)
+    , m_context(nullptr)
     , m_pendingCallbacks(std::make_unique<PendingCallbacks>())
+    , m_depthTriggeringEntityExpansion(-1)
+    , m_isParsingEntityDeclaration(false)
     , m_currentNode(&fragment)
+    , m_sawError(false)
+    , m_sawCSS(false)
+    , m_sawXSLTransform(false)
+    , m_sawFirstElement(false)
+    , m_isXHTMLDocument(false)
+    , m_parserPaused(false)
+    , m_requestingScript(false)
+    , m_finishCalled(false)
     , m_scriptStartPosition(TextPosition::belowRangePosition())
     , m_parsingFragment(true)
 {
@@ -1159,7 +1194,8 @@ static xmlEntityPtr sharedXHTMLEntity()
 static size_t convertUTF16EntityToUTF8(const UChar* utf16Entity, size_t numberOfCodeUnits, char* target, size_t targetSize)
 {
     const char* originalTarget = target;
-    auto conversionResult = WTF::Unicode::convertUTF16ToUTF8(&utf16Entity, utf16Entity + numberOfCodeUnits, &target, target + targetSize);
+    WTF::Unicode::ConversionResult conversionResult = WTF::Unicode::convertUTF16ToUTF8(&utf16Entity,
+        utf16Entity + numberOfCodeUnits, &target, target + targetSize);
     if (conversionResult != WTF::Unicode::conversionOK)
         return 0;
 
@@ -1328,7 +1364,7 @@ void XMLDocumentParser::doEnd()
     }
 
 #if ENABLE(XSLT)
-    bool xmlViewerMode = !m_sawError && !m_sawCSS && !m_sawXSLTransform && shouldRenderInXMLTreeViewerMode(*document());
+    bool xmlViewerMode = !m_sawError && !m_sawCSS && !m_sawXSLTransform && hasNoStyleInformation(document());
     if (xmlViewerMode) {
         XMLTreeViewer xmlTreeViewer(*document());
         xmlTreeViewer.transformDocumentToTreeView();
@@ -1414,12 +1450,13 @@ void XMLDocumentParser::resumeParsing()
             return;
     }
 
+    // Then, write any pending data
+    SegmentedString rest = m_pendingSrc;
+    m_pendingSrc.clear();
     // There is normally only one string left, so toString() shouldn't copy.
     // In any case, the XML parser runs on the main thread and it's OK if
     // the passed string has more than one reference.
-    auto rest = m_pendingSrc.toString();
-    m_pendingSrc.clear();
-    append(rest.impl());
+    append(rest.toString().impl());
 
     // Finally, if finish() has been called and write() didn't result
     // in any further callbacks being queued, call end()