Reviewed by Alice.
- support for change that should fix <rdar://problem/
4733044>
REGRESSION: XML iBench shows 10% perf. regression (copying
strings while decoding)
* wtf/Vector.h: Changed VectorBuffer so that the general case
contains an instance of the 0 case, since deriving from it
was violating the Liskov Substitution Principle.
(WTF::VectorBuffer::releaseBuffer): Added. Releases the buffer so it can
be adopted by another data structure that uses the FastMalloc.h allocator.
Returns 0 if the internal buffer was being used.
(WTF::Vector::releaseBuffer): Added. Releases the buffer as above or creates
a new one in the case where the internal buffer was being used.
WebCore:
Reviewed by Alice.
- change that should fix <rdar://problem/
4733044> REGRESSION: XML iBench shows
10% perf. regression (copying strings while decoding)
Use Vector<UChar> instead of String when building up the decoded string in
the ICU and Mac decoders. Using String leads to O(n^2) behavior because
String grows the buffer every single time that append is called. Using
Vector::append instead of String::append also avoids constructing a string
each time just to append and a questionable copy that is done inside the
String::append function which also contributed to the slowness.
* platform/PlatformString.h:
* platform/String.cpp: (WebCore::String::adopt): Added. Makes a String from a
Vector<UChar>, adopting the buffer from the vector to avoid copying and memory
allocation.
* platform/StringImpl.h:
* platform/StringImpl.cpp: (WebCore::StringImpl::adopt): Ditto.
* platform/StreamingTextDecoder.h:
* platform/StreamingTextDecoder.cpp: (WebCore::TextCodec::appendOmittingBOM):
Change to use a Vector<UChar> instead of a String, since vectors have better
resizing performance (they store a separate capacity).
* platform/StreamingTextDecoderICU.cpp: (WebCore::TextCodecICU::decode):
* platform/mac/StreamingTextDecoderMac.cpp: (WebCore::TextCodecMac::decode):
Change to use Vector<UChar> instead of String and then create a string at
the end of the process using the new adopt function.
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@16622
268f45cc-cd09-0410-ab3c-
d52691b4dbfc
+2006-09-28 Darin Adler <darin@apple.com>
+
+ Reviewed by Alice.
+
+ - support for change that should fix <rdar://problem/4733044>
+ REGRESSION: XML iBench shows 10% perf. regression (copying
+ strings while decoding)
+
+ * wtf/Vector.h: Changed VectorBuffer so that the general case
+ contains an instance of the 0 case, since deriving from it
+ was violating the Liskov Substitution Principle.
+ (WTF::VectorBuffer::releaseBuffer): Added. Releases the buffer so it can
+ be adopted by another data structure that uses the FastMalloc.h allocator.
+ Returns 0 if the internal buffer was being used.
+ (WTF::Vector::releaseBuffer): Added. Releases the buffer as above or creates
+ a new one in the case where the internal buffer was being used.
+
2006-09-28 Maciej Stachowiak <mjs@apple.com>
Reviewed by Geoff.
class VectorBuffer;
template<typename T>
- class VectorBuffer<T, 0>
- {
+ class VectorBuffer<T, 0> {
public:
VectorBuffer()
- : m_capacity(0)
- , m_buffer(0)
+ : m_buffer(0), m_capacity(0)
{
}
VectorBuffer(size_t capacity)
+#if !ASSERT_DISABLED
: m_capacity(0)
+#endif
{
allocateBuffer(capacity);
}
deallocateBuffer(m_buffer);
}
- void deallocateBuffer(T* buffer)
+ static void deallocateBuffer(T* buffer)
{
fastFree(buffer);
}
m_capacity = newCapacity;
if (newCapacity > std::numeric_limits<size_t>::max() / sizeof(T))
abort();
- m_buffer = reinterpret_cast<T*>(fastMalloc(newCapacity * sizeof(T)));
+ m_buffer = static_cast<T*>(fastMalloc(newCapacity * sizeof(T)));
}
T* buffer() { return m_buffer; }
const T* buffer() const { return m_buffer; }
size_t capacity() const { return m_capacity; }
+ T* releaseBuffer()
+ {
+ T* buffer = m_buffer;
+ m_buffer = 0;
+ m_capacity = 0;
+ return buffer;
+ }
+
protected:
VectorBuffer(T* buffer, size_t capacity)
- : m_capacity(capacity)
- , m_buffer(buffer)
+ : m_buffer(buffer), m_capacity(capacity)
{
}
+ T* m_buffer;
+
+ private:
size_t m_capacity;
- T *m_buffer;
};
template<typename T, size_t inlineCapacity>
- class VectorBuffer : public VectorBuffer<T, 0> {
+ class VectorBuffer : private VectorBuffer<T, 0> {
private:
typedef VectorBuffer<T, 0> BaseBuffer;
public:
- VectorBuffer()
+ VectorBuffer()
: BaseBuffer(inlineBuffer(), inlineCapacity)
{
}
: BaseBuffer(inlineBuffer(), inlineCapacity)
{
if (capacity > inlineCapacity)
- BaseBuffer::allocateBuffer(capacity);
+ allocateBuffer(capacity);
}
~VectorBuffer()
{
- if (BaseBuffer::buffer() == inlineBuffer())
+ if (buffer() == inlineBuffer())
BaseBuffer::m_buffer = 0;
}
BaseBuffer::deallocateBuffer(buffer);
}
+ using BaseBuffer::allocateBuffer;
+
+ using BaseBuffer::buffer;
+ using BaseBuffer::capacity;
+
+ T* releaseBuffer()
+ {
+ if (buffer() == inlineBuffer())
+ return 0;
+ return BaseBuffer::releaseBuffer();
+ }
+
private:
static const size_t m_inlineBufferSize = inlineCapacity * sizeof(T);
- T *inlineBuffer() { return reinterpret_cast<T *>(&m_inlineBuffer); }
+ T* inlineBuffer() { return reinterpret_cast<T*>(&m_inlineBuffer); }
+
char m_inlineBuffer[m_inlineBufferSize];
};
void fill(const T& val, size_t size);
void fill(const T& val) { fill(val, size()); }
+ T* releaseBuffer();
+
private:
void expandCapacity(size_t newMinCapacity);
const T* expandCapacity(size_t newMinCapacity, const T*);
--m_size;
}
+ template<typename T, size_t inlineCapacity>
+ T* Vector<T, inlineCapacity>::releaseBuffer()
+ {
+ T* buffer = m_impl.releaseBuffer();
+ if (!buffer && m_size) {
+ // If the vector had some data, but no buffer to release,
+ // that means it was using the inline buffer. In that case,
+ // we create a brand new buffer so the caller always gets one.
+ size_t bytes = m_size * sizeof(T);
+ buffer = static_cast<T*>(fastMalloc(bytes));
+ memcpy(buffer, data(), bytes);
+ }
+ m_size = 0;
+ return buffer;
+ }
+
template<typename T, size_t inlineCapacity>
void deleteAllValues(const Vector<T, inlineCapacity>& collection)
{
+2006-09-28 Darin Adler <darin@apple.com>
+
+ Reviewed by Alice.
+
+ - change that should fix <rdar://problem/4733044> REGRESSION: XML iBench shows
+ 10% perf. regression (copying strings while decoding)
+
+ Use Vector<UChar> instead of String when building up the decoded string in
+ the ICU and Mac decoders. Using String leads to O(n^2) behavior because
+ String grows the buffer every single time that append is called. Using
+ Vector::append instead of String::append also avoids constructing a string
+ each time just to append and a questionable copy that is done inside the
+ String::append function which also contributed to the slowness.
+
+ * platform/PlatformString.h:
+ * platform/String.cpp: (WebCore::String::adopt): Added. Makes a String from a
+ Vector<UChar>, adopting the buffer from the vector to avoid copying and memory
+ allocation.
+ * platform/StringImpl.h:
+ * platform/StringImpl.cpp: (WebCore::StringImpl::adopt): Ditto.
+
+ * platform/StreamingTextDecoder.h:
+ * platform/StreamingTextDecoder.cpp: (WebCore::TextCodec::appendOmittingBOM):
+ Change to use a Vector<UChar> instead of a String, since vectors have better
+ resizing performance (they store a separate capacity).
+
+ * platform/StreamingTextDecoderICU.cpp: (WebCore::TextCodecICU::decode):
+ * platform/mac/StreamingTextDecoderMac.cpp: (WebCore::TextCodecMac::decode):
+ Change to use Vector<UChar> instead of String and then create a string at
+ the end of the process using the new adopt function.
+
2006-09-28 Sam Weinig <sam.weinig@gmail.com>
Reviewed by Tim H.
String(StringImpl* i) : m_impl(i) { }
static String newUninitialized(size_t length, UChar*& characterBuffer);
+ static String adopt(Vector<UChar>&);
operator KJS::Identifier() const;
operator KJS::UString() const;
// We strip BOM characters because they can show up both at the start of content
// and inside content, and we never want them to end up in the decoded text.
-void TextCodec::appendOmittingBOM(String& s, const UChar* characters, size_t length)
+void TextCodec::appendOmittingBOM(Vector<UChar>& v, const UChar* characters, size_t length)
{
size_t start = 0;
for (size_t i = 0; i != length; ++i) {
if (BOM == characters[i]) {
if (start != i)
- s.append(String(&characters[start], i - start));
+ v.append(&characters[start], i - start);
start = i + 1;
}
}
if (start != length)
- s.append(String(&characters[start], length - start));
+ v.append(&characters[start], length - start);
}
} // namespace WebCore
#include "UChar.h"
#include <memory>
#include <wtf/Noncopyable.h>
+#include <wtf/Vector.h>
namespace WebCore {
virtual CString encode(const UChar*, size_t length, bool allowEntities = false) = 0;
protected:
- static void appendOmittingBOM(String&, const UChar*, size_t length);
+ static void appendOmittingBOM(Vector<UChar>&, const UChar*, size_t length);
};
typedef void (*EncodingNameRegistrar)(const char* alias, const char* name);
}
}
- String result = "";
+ Vector<UChar> result;
UChar buffer[ConversionBufferSize];
const char* source = reinterpret_cast<const char*>(bytes);
return String();
}
- return result;
+ return String::adopt(result);
}
CString TextCodecICU::encode(const UChar* characters, size_t length, bool allowEntities)
return StringImpl::newUninitialized(length, characterBuffer);
}
+String String::adopt(Vector<UChar>& buffer)
+{
+ // For an empty buffer, construct an empty string, not a null string,
+ // and use a standard constructor so we get the shared empty string.
+ if (buffer.isEmpty())
+ return "";
+ return StringImpl::adopt(buffer);
+}
+
} // namespace WebCore
#ifndef NDEBUG
return result;
}
+StringImpl* StringImpl::adopt(Vector<UChar>& buffer)
+{
+ size_t size = buffer.size();
+ UChar* data = buffer.releaseBuffer();
+ data = static_cast<UChar*>(fastRealloc(data, size * sizeof(UChar)));
+
+ StringImpl* result = new StringImpl;
+ result->m_length = size;
+ result->m_data = data;
+ return result;
+}
+
} // namespace WebCore
~StringImpl();
static StringImpl* newUninitialized(size_t length, UChar*& characterBuffer);
+ static StringImpl* adopt(Vector<UChar>&);
const UChar* characters() const { return m_data; }
unsigned length() const { return m_length; }
if (!m_converterTEC && createTECConverter() != noErr)
return String();
- String result("");
+ Vector<UChar> result;
const unsigned char* sourcePointer = reinterpret_cast<const unsigned char*>(bytes);
int sourceLength = length;
appendOmittingBOM(result, buffer, bytesWritten / sizeof(UChar));
}
+ String resultString = String::adopt(result);
+
// Workaround for a bug in the Text Encoding Converter (see bug 3225472).
// Simplified Chinese pages use the code U+A3A0 to mean "full-width space".
// But GB18030 decodes it to U+E5E5, which is correct in theory but not in practice.
// To work around, just change all occurences of U+E5E5 to U+3000 (ideographic space).
if (m_encoding == kCFStringEncodingGB_18030_2000)
- result.replace(0xE5E5, 0x3000);
+ resultString.replace(0xE5E5, 0x3000);
- return result;
+ return resultString;
}
CString TextCodecMac::encode(const UChar* characters, size_t length, bool allowEntities)