+2017-12-02 Darin Adler <darin@apple.com>
+
+ Modernize some aspects of text codecs, eliminate WebKit use of strcasecmp
+ https://bugs.webkit.org/show_bug.cgi?id=180009
+
+ Reviewed by Alex Christensen.
+
+ * bytecode/ArrayProfile.cpp: Removed include of StringExtras.h.
+ * bytecode/CodeBlock.cpp: Ditto.
+ * bytecode/ExecutionCounter.cpp: Ditto.
+ * runtime/ConfigFile.cpp: Ditto.
+ * runtime/DatePrototype.cpp: Ditto.
+ * runtime/IndexingType.cpp: Ditto.
+ * runtime/JSCJSValue.cpp: Ditto.
+ * runtime/JSDateMath.cpp: Ditto.
+ * runtime/JSGlobalObjectFunctions.cpp: Ditto.
+ * runtime/Options.cpp: Ditto.
+ (JSC::parse): Use equalLettersIgnoringASCIICase instead of strcasecmp.
+
2017-12-06 Saam Barati <sbarati@apple.com>
ASSERTION FAILED: vm->currentThreadIsHoldingAPILock() in void JSC::sanitizeStackForVM(JSC::VM *)
#include "CodeBlock.h"
#include "JSCInlines.h"
#include <wtf/CommaPrinter.h>
-#include <wtf/StringExtras.h>
#include <wtf/StringPrintStream.h>
namespace JSC {
#include <wtf/BagToHashMap.h>
#include <wtf/CommaPrinter.h>
#include <wtf/SimpleStats.h>
-#include <wtf/StringExtras.h>
#include <wtf/StringPrintStream.h>
#include <wtf/text/UniquedStringImpl.h>
#include "ExecutableAllocator.h"
#include "JSCInlines.h"
#include "VMInlines.h"
-#include <wtf/StringExtras.h>
namespace JSC {
#include <string.h>
#include <wtf/ASCIICType.h>
#include <wtf/DataLog.h>
-#include <wtf/StringExtras.h>
#include <wtf/text/StringBuilder.h>
#if HAVE(REGEX_H)
#include <time.h>
#include <wtf/Assertions.h>
#include <wtf/MathExtras.h>
-#include <wtf/StringExtras.h>
#if HAVE(LANGINFO_H)
#include <langinfo.h>
#include "config.h"
#include "IndexingType.h"
-#include <wtf/StringExtras.h>
-
namespace JSC {
IndexingType leastUpperBoundOfIndexingTypes(IndexingType a, IndexingType b)
#include "JSGlobalObject.h"
#include "NumberObject.h"
#include <wtf/MathExtras.h>
-#include <wtf/StringExtras.h>
namespace JSC {
#include <wtf/CurrentTime.h>
#include <wtf/MathExtras.h>
#include <wtf/StdLibExtras.h>
-#include <wtf/StringExtras.h>
#if HAVE(ERRNO_H)
#include <errno.h>
#include <wtf/Assertions.h>
#include <wtf/HexNumber.h>
#include <wtf/MathExtras.h>
-#include <wtf/StringExtras.h>
#include <wtf/dtoa.h>
#include <wtf/text/StringBuilder.h>
#include <wtf/unicode/UTF8.h>
#include <wtf/DataLog.h>
#include <wtf/NumberOfCores.h>
#include <wtf/StdLibExtras.h>
-#include <wtf/StringExtras.h>
#include <wtf/text/StringBuilder.h>
#include <wtf/threads/Signals.h>
static bool parse(const char* string, bool& value)
{
- if (!strcasecmp(string, "true") || !strcasecmp(string, "yes") || !strcmp(string, "1")) {
+ if (equalLettersIgnoringASCIICase(string, "true") || equalLettersIgnoringASCIICase(string, "yes") || !strcmp(string, "1")) {
value = true;
return true;
}
- if (!strcasecmp(string, "false") || !strcasecmp(string, "no") || !strcmp(string, "0")) {
+ if (equalLettersIgnoringASCIICase(string, "false") || equalLettersIgnoringASCIICase(string, "no") || !strcmp(string, "0")) {
value = false;
return true;
}
static bool parse(const char* string, GCLogging::Level& value)
{
- if (!strcasecmp(string, "none") || !strcasecmp(string, "no") || !strcasecmp(string, "false") || !strcmp(string, "0")) {
+ if (equalLettersIgnoringASCIICase(string, "none") || equalLettersIgnoringASCIICase(string, "no") || equalLettersIgnoringASCIICase(string, "false") || !strcmp(string, "0")) {
value = GCLogging::None;
return true;
}
- if (!strcasecmp(string, "basic") || !strcasecmp(string, "yes") || !strcasecmp(string, "true") || !strcmp(string, "1")) {
+ if (equalLettersIgnoringASCIICase(string, "basic") || equalLettersIgnoringASCIICase(string, "yes") || equalLettersIgnoringASCIICase(string, "true") || !strcmp(string, "1")) {
value = GCLogging::Basic;
return true;
}
- if (!strcasecmp(string, "verbose") || !strcmp(string, "2")) {
+ if (equalLettersIgnoringASCIICase(string, "verbose") || !strcmp(string, "2")) {
value = GCLogging::Verbose;
return true;
}
+2017-12-02 Darin Adler <darin@apple.com>
+
+ Modernize some aspects of text codecs, eliminate WebKit use of strcasecmp
+ https://bugs.webkit.org/show_bug.cgi?id=180009
+
+ Reviewed by Alex Christensen.
+
+ * wtf/Assertions.cpp: Removed include of StringExtras.h.
+ (WTFLogChannelByName): Use equalIgnoringASCIICase instead of strcasecmp.
+ * wtf/DateMath.cpp: Removed include of StringExtras.h.
+ * wtf/MD5.cpp: Ditto. Also removed include of CString.h.
+ * wtf/SHA1.cpp: Ditto.
+
+ * wtf/StringExtras.h:
+ (strncasecmp): Deleted.
+ (strcasecmp): Deleted.
+
+ * wtf/StringPrintStream.cpp: Removed include of StringExtras.h.
+ * wtf/text/Base64.cpp: Ditto.
+
+ * wtf/text/LineEnding.cpp:
+ (WTF::normalizeLineEndingsToLF): Replaced old more general purpose function with
+ this. Changed argument type to vector and used an rvalue reference and return value.
+ Also fixed some small logic errors.
+ (WTF::normalizeLineEndingsToCRLF): Ditto.
+ (WTF::normalizeLineEndingsToNative): Updated for above changes.
+ * wtf/text/LineEnding.h: Updated for above changes.
+
+ * wtf/text/StringCommon.h:
+ (WTF::equalIgnoringASCIICase): Added overload for const char*.
+ (WTF::equalLettersIgnoringASCIICase): Ditto.
+
+ * wtf/text/TextStream.cpp: Removed include of StringExtras.h.
+ * wtf/text/WTFString.cpp: Ditto.
+ * wtf/unicode/icu/CollatorICU.cpp: Ditto.
+
2017-12-06 Yusuke Suzuki <utatane.tea@gmail.com>
[WTF] Remove XXXLockBase since constexpr constructor can initialize static variables without calling global constructors
#include <wtf/RetainPtr.h>
#include <wtf/StackTrace.h>
#include <wtf/StdLibExtras.h>
-#include <wtf/StringExtras.h>
#include <wtf/text/CString.h>
#include <wtf/text/StringBuilder.h>
#include <wtf/text/WTFString.h>
{
for (size_t i = 0; i < count; ++i) {
WTFLogChannel* channel = channels[i];
- if (!strcasecmp(name, channel->name))
+ if (equalIgnoringASCIICase(name, channel->name))
return channel;
}
#include "CurrentTime.h"
#include "MathExtras.h"
#include "StdLibExtras.h"
-#include "StringExtras.h"
#include <algorithm>
#include <limits.h>
#include "MD5.h"
#include "Assertions.h"
-#ifndef NDEBUG
-#include "StringExtras.h"
-#include "text/CString.h"
-#endif
#include <wtf/StdLibExtras.h>
namespace WTF {
#include "SHA1.h"
#include "Assertions.h"
-
-#include "StringExtras.h"
#include "text/CString.h"
namespace WTF {
/*
- * Copyright (C) 2006, 2010 Apple Inc. All rights reserved.
+ * Copyright (C) 2006-2017 Apple Inc. All rights reserved.
* Copyright (C) 2015 Electronic Arts, Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef WTF_StringExtras_h
-#define WTF_StringExtras_h
+#pragma once
-#include <stdarg.h>
-#include <stdio.h>
#include <string.h>
-#if HAVE(STRINGS_H)
-#include <strings.h>
-#endif
-
-#if COMPILER(MSVC)
-
-// FIXME: We should stop using these entirely and use suitable versions of equalIgnoringASCIICase instead.
-
-inline int strncasecmp(const char* s1, const char* s2, size_t len)
-{
- return _strnicmp(s1, s2, len);
-}
-
-inline int strcasecmp(const char* s1, const char* s2)
-{
- return _stricmp(s1, s2);
-}
-
-#endif
-
#if !HAVE(STRNSTR)
inline char* strnstr(const char* buffer, const char* target, size_t bufferLength)
}
#endif
-
-#endif // WTF_StringExtras_h
#include <stdarg.h>
#include <stdio.h>
#include <wtf/FastMalloc.h>
-#include <wtf/StringExtras.h>
namespace WTF {
#include "Base64.h"
#include <limits.h>
-#include <wtf/StringExtras.h>
#include <wtf/text/WTFString.h>
namespace WTF {
/*
- * Copyright (C) 2005, 2006, 2008, 2017 Apple Inc. All rights reserved.
+ * Copyright (C) 2005-2017 Apple Inc. All rights reserved.
* Copyright (C) 2010 Google Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
#include "config.h"
#include "LineEnding.h"
-#include <wtf/text/CString.h>
-#include <wtf/text/WTFString.h>
+#include <wtf/Vector.h>
-namespace {
-
-class OutputBuffer {
-public:
- virtual uint8_t* allocate(size_t) = 0;
- virtual void copy(const CString&) = 0;
- virtual ~OutputBuffer() { }
-};
-
-class CStringBuffer : public OutputBuffer {
-public:
- CStringBuffer(CString& buffer)
- : m_buffer(buffer)
- {
- }
- virtual ~CStringBuffer() { }
-
- uint8_t* allocate(size_t size) override
- {
- char* ptr;
- m_buffer = CString::newUninitialized(size, ptr);
- return reinterpret_cast<uint8_t*>(ptr);
- }
-
- void copy(const CString& source) override
- {
- m_buffer = source;
- }
-
- const CString& buffer() const { return m_buffer; }
-
-private:
- CString m_buffer;
-};
-
-#if OS(WINDOWS)
-class VectorCharAppendBuffer : public OutputBuffer {
-public:
- VectorCharAppendBuffer(Vector<uint8_t>& buffer)
- : m_buffer(buffer)
- {
- }
- virtual ~VectorCharAppendBuffer() { }
-
- uint8_t* allocate(size_t size) override
- {
- size_t oldSize = m_buffer.size();
- m_buffer.grow(oldSize + size);
- return m_buffer.data() + oldSize;
- }
-
- void copy(const CString& source) override
- {
- m_buffer.append(source.data(), source.length());
- }
-
-private:
- Vector<uint8_t>& m_buffer;
-};
-#endif
+namespace WTF {
-void internalNormalizeLineEndingsToCRLF(const CString& from, OutputBuffer& buffer)
+Vector<uint8_t> normalizeLineEndingsToLF(Vector<uint8_t>&& vector)
{
- if (!from.length())
- return;
- // Compute the new length.
- size_t newLen = 0;
- const char* p = from.data();
- while (p < from.data() + from.length()) {
- char c = *p++;
- if (c == '\r') {
- // Safe to look ahead because of trailing '\0'.
- if (*p != '\n') {
- // Turn CR into CRLF.
- newLen += 2;
- }
- } else if (c == '\n') {
- // Turn LF into CRLF.
- newLen += 2;
- } else {
- // Leave other characters alone.
- newLen += 1;
- }
- }
- if (newLen < from.length())
- return;
-
- if (newLen == from.length()) {
- buffer.copy(from);
- return;
- }
-
- p = from.data();
- uint8_t* q = buffer.allocate(newLen);
-
- // Make a copy of the string.
- while (p < from.data() + from.length()) {
- char c = *p++;
- if (c == '\r') {
- // Safe to look ahead because of trailing '\0'.
- if (*p != '\n') {
- // Turn CR into CRLF.
- *q++ = '\r';
- *q++ = '\n';
- }
- } else if (c == '\n') {
- // Turn LF into CRLF.
- *q++ = '\r';
+ auto q = vector.data();
+ for (auto p = vector.data(), end = p + vector.size(); p != end; ) {
+ auto character = *p++;
+ if (character == '\r') {
+ // Turn CRLF and CR into LF.
+ if (p != end && *p == '\n')
+ ++p;
*q++ = '\n';
} else {
// Leave other characters alone.
- *q++ = c;
+ *q++ = character;
}
}
+ vector.shrink(q - vector.data());
+ return WTFMove(vector);
}
-};
-
-namespace WTF {
-
-// Normalize all line-endings to CR or LF.
-static void normalizeToCROrLF(const CString& from, Vector<uint8_t>& result, bool toCR)
+Vector<uint8_t> normalizeLineEndingsToCRLF(Vector<uint8_t>&& source)
{
- // Compute the new length.
- size_t newLen = 0;
- bool needFix = false;
- const char* p = from.data();
- char fromEndingChar = toCR ? '\n' : '\r';
- char toEndingChar = toCR ? '\r' : '\n';
- while (p < from.data() + from.length()) {
- char c = *p++;
- if (c == '\r' && *p == '\n') {
- // Turn CRLF into CR or LF.
- p++;
- needFix = true;
- } else if (c == fromEndingChar) {
- // Turn CR/LF into LF/CR.
- needFix = true;
+ size_t resultLength = 0;
+ for (auto p = source.data(), end = p + source.size(); p != end; ) {
+ auto character = *p++;
+ if (character == '\r') {
+ // Turn CR or CRLF into CRLF;
+ if (p != end && *p == '\n')
+ ++p;
+ resultLength += 2;
+ } else if (character == '\n') {
+ // Turn LF into CRLF.
+ resultLength += 2;
+ } else {
+ // Leave other characters alone.
+ resultLength += 1;
}
- newLen += 1;
}
- // Grow the result buffer.
- p = from.data();
- size_t oldResultSize = result.size();
- result.grow(oldResultSize + newLen);
- uint8_t* q = result.data() + oldResultSize;
-
- // If no need to fix the string, just copy the string over.
- if (!needFix) {
- memcpy(q, p, from.length());
- return;
- }
-
- // Make a copy of the string.
- while (p < from.data() + from.length()) {
- char c = *p++;
- if (c == '\r' && *p == '\n') {
- // Turn CRLF or CR into CR or LF.
- p++;
- *q++ = toEndingChar;
- } else if (c == fromEndingChar) {
- // Turn CR/LF into LF/CR.
- *q++ = toEndingChar;
+ if (resultLength == source.size())
+ return WTFMove(source);
+
+ Vector<uint8_t> result(resultLength);
+ auto q = result.data();
+ for (auto p = source.data(), end = p + source.size(); p != end; ) {
+ auto character = *p++;
+ if (character == '\r') {
+ // Turn CR or CRLF into CRLF;
+ if (p != end && *p == '\n')
+ ++p;
+ *q++ = '\r';
+ *q++ = '\n';
+ } else if (character == '\n') {
+ // Turn LF into CRLF.
+ *q++ = '\r';
+ *q++ = '\n';
} else {
// Leave other characters alone.
- *q++ = c;
+ *q++ = character;
}
}
+ ASSERT(q == result.data() + resultLength);
+ return result;
}
-CString normalizeLineEndingsToCRLF(const CString& from)
-{
- CString result;
- ::CStringBuffer buffer(result);
- internalNormalizeLineEndingsToCRLF(from, buffer);
- return buffer.buffer();
-}
-
-void normalizeAndAppendLineEndingsToNative(const CString& from, Vector<uint8_t>& result)
+Vector<uint8_t> normalizeLineEndingsToNative(Vector<uint8_t>&& from)
{
#if OS(WINDOWS)
- VectorCharAppendBuffer buffer(result);
- internalNormalizeLineEndingsToCRLF(from, buffer);
+ return normalizeLineEndingsToCRLF(WTFMove(from));
#else
- normalizeToCROrLF(from, result, false);
+ return normalizeLineEndingsToLF(WTFMove(from));
#endif
}
/*
- * Copyright (C) 2005, 2006, 2008, 2017 Apple Inc. All rights reserved.
+ * Copyright (C) 2005-2017 Apple Inc. All rights reserved.
* Copyright (C) 2010 Google Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
#pragma once
#include <wtf/Forward.h>
-#include <wtf/Vector.h>
namespace WTF {
-// Normalize all line-endings in the given string to CRLF.
-WTF_EXPORT CString normalizeLineEndingsToCRLF(const CString& from);
+// Normalize all line-endings in the given string.
+WTF_EXPORT Vector<uint8_t> normalizeLineEndingsToLF(Vector<uint8_t>&&);
+WTF_EXPORT Vector<uint8_t> normalizeLineEndingsToCRLF(Vector<uint8_t>&&);
-// Normalize all line-endings in the given string to the native line-endings and append the result to the given buffer.
-// (Normalize to CRLF on Windows and normalize to LF on all other platforms.)
-WTF_EXPORT void normalizeAndAppendLineEndingsToNative(const CString& from, Vector<uint8_t>& result);
+// Normalize all line-endings to CRLF on Windows, to LF on all other platforms.
+WTF_EXPORT Vector<uint8_t> normalizeLineEndingsToNative(Vector<uint8_t>&&);
} // namespace WTF
using WTF::normalizeLineEndingsToCRLF;
-using WTF::normalizeAndAppendLineEndingsToNative;
+using WTF::normalizeLineEndingsToLF;
+using WTF::normalizeLineEndingsToNative;
template<typename StringClass, unsigned length> bool equalLettersIgnoringASCIICaseCommon(const StringClass&, const char (&lowercaseLetters)[length]);
+bool equalIgnoringASCIICase(const char*, const char*);
+template<unsigned lowercaseLettersLength> bool equalLettersIgnoringASCIICase(const char*, const char (&lowercaseLetters)[lowercaseLettersLength]);
+
template<typename T>
inline T loadUnaligned(const char* s)
{
return startsWithLettersIgnoringASCIICaseCommonWithoutLength(string, pointer);
}
+inline bool equalIgnoringASCIICase(const char* a, const char* b)
+{
+ auto length = strlen(a);
+ return length == strlen(b) && equalIgnoringASCIICase(a, b, length);
+}
+
+template<unsigned lowercaseLettersLength> inline bool equalLettersIgnoringASCIICase(const char* string, const char (&lowercaseLetters)[lowercaseLettersLength])
+{
+ auto length = strlen(lowercaseLetters);
+ return strlen(string) == length && equalLettersIgnoringASCIICase(string, lowercaseLetters, length);
+}
+
}
using WTF::equalIgnoringASCIICase;
#include "TextStream.h"
#include <wtf/MathExtras.h>
-#include <wtf/StringExtras.h>
#include <wtf/text/WTFString.h>
namespace WTF {
#include <wtf/MathExtras.h>
#include <wtf/NeverDestroyed.h>
#include <wtf/text/CString.h>
-#include <wtf/StringExtras.h>
#include <wtf/Vector.h>
#include <wtf/dtoa.h>
#include <wtf/unicode/CharacterNames.h>
#include <mutex>
#include <unicode/ucol.h>
#include <wtf/Lock.h>
-#include <wtf/StringExtras.h>
#include <wtf/text/StringView.h>
#if OS(DARWIN) && USE(CF)
+2017-12-02 Darin Adler <darin@apple.com>
+
+ Modernize some aspects of text codecs, eliminate WebKit use of strcasecmp
+ https://bugs.webkit.org/show_bug.cgi?id=180009
+
+ Reviewed by Alex Christensen.
+
+ * Modules/fetch/FetchBody.cpp:
+ (WebCore::FetchBody::consumeAsStream): Update to use size since the result of
+ encode is now Vector rather than CString. And for the new UnencodableHandling.
+ (WebCore::FetchBody::consumeText): Removed now-unneeded type cast. Ditto.
+ (WebCore::FetchBody::bodyAsFormData const): Ditto.
+ (WebCore::FetchBody::take): Pass result of encode directly to SharedBuffer::create.
+
+ * Modules/websockets/WebSocketDeflater.cpp: Removed include of StringExtras.h.
+ * bridge/IdentifierRep.h: Ditto.
+ * bridge/c/c_instance.cpp: Ditto.
+
+ * fileapi/BlobBuilder.cpp:
+ (WebCore::BlobBuilder::append): Updated for CString to Vector change and
+ for UnencodableHandling.
+
+ * html/parser/HTMLMetaCharsetParser.cpp:
+ (WebCore::HTMLMetaCharsetParser::checkForMetaCharset): Call the decode function
+ with all the arguments since there is only one decode function now.
+
+ * inspector/agents/InspectorDOMAgent.cpp:
+ (WebCore::computeContentSecurityPolicySHA256Hash): Updated for CString to Vector
+ change and for UnencodableHandling.
+
+ * loader/ResourceCryptographicDigest.cpp:
+ (WebCore::cryptographicDigestForBytes): Changed argument type to const void*.
+ * loader/ResourceCryptographicDigest.h: Ditto.
+
+ * loader/TextResourceDecoder.cpp:
+ (WebCore::TextResourceDecoder::TextResourceDecoder): Moved initialization of data
+ members to class definition.
+ (WebCore::TextResourceDecoder::create): Moved function body here from the header.
+ (WebCore::TextResourceDecoder::setEncoding): Use equalLettersIgnoringASCIICase.
+ (WebCore::TextResourceDecoder::shouldAutoDetect const): Updated for name change
+ to m_parentFrameAutoDetectedEncoding, which obviates a comment.
+ (WebCore::TextResourceDecoder::flush): Ditto.
+ * loader/TextResourceDecoder.h: Moved initialization here from constructor.
+ Moved function bodies out of the class. Renamed m_hintEncoding to
+ m_parentFrameAutoDetectedEncoding since that is a more accurate description.
+
+ * loader/appcache/ApplicationCacheStorage.cpp: Removed include of StringExtras.h.
+ * page/FrameTree.cpp: Ditto.
+ * page/PageSerializer.cpp: Ditto.
+ (WebCore::PageSerializer::serializeFrame): Pass result of encode directly to
+ StringBuffer::create and update for UnencodableHandling.
+ (WebCore::PageSerializer::serializeCSSStyleSheet): Ditto.
+ * page/csp/ContentSecurityPolicy.cpp:
+ (WebCore::ContentSecurityPolicy::findHashOfContentInPolicies const): Ditto.
+
+ * platform/FileHandle.cpp: Removed include of StringExtras.h.
+
+ * platform/SharedBuffer.cpp:
+ (WebCore::SharedBuffer::create): Added an overload that takes Vector<uint8_t>.
+ * platform/SharedBuffer.h: Ditto.
+
+ * platform/URLParser.cpp:
+ (WebCore::URLParser::encodeQuery): Updated since encode returns a Vector instead
+ of a CString now and for UnencodableHandling.
+ * platform/graphics/avfoundation/objc/MediaPlayerPrivateAVFoundationObjC.mm:
+ (WebCore::MediaPlayerPrivateAVFoundationObjC::shouldWaitForLoadingOfResource): Ditto.
+
+ * platform/graphics/ca/PlatformCALayer.cpp: Removed include of StringExtras.h.
+
+ * platform/network/curl/ResourceHandleCurlDelegate.cpp:
+ (WebCore::ResourceHandleCurlDelegate::handleDataURL): Updated for Vector instead
+ of CString.
+
+ * platform/network/FormData.cpp:
+ (WebCore::FormData::create): Added new overload, and simplified some existing ones.
+ (WebCore::normalizeStringData): Changed return type to Vector<uint8_t> and updated
+ for UnencodableHandling.
+ (WebCore::FormData::appendMultiPartStringValue): Updated for change in type of
+ result of normalizeStringData.
+ * platform/network/FormData.h: Updated for the above and updated comments.
+
+ * platform/network/FormDataBuilder.cpp: Made this a namespace instead of a class.
+ (WebCore::FormDataBuilder::append): Added an overload for Vector<uint8_t>.
+ (WebCore::FormDataBuilder::appendQuoted): Renamed from appendQuotedString and
+ changed the argument type.
+ (WebCore::FormDataBuilder::appendFormURLEncoded): Moved logic up from the
+ encodeStringAsFormData function into a new separate helper.
+ (WebCore::FormDataBuilder::addFilenameToMultiPartHeader): Updated for change to
+ UnencodableHandling.
+ (WebCore::FormDataBuilder::beginMultiPartHeader): Changed argument type.
+ (WebCore::FormDataBuilder::addKeyValuePairAsFormData): Ditto.
+ (WebCore::FormDataBuilder::encodeStringAsFormData): Updated to call helper.
+ * platform/network/FormDataBuilder.h: Updated for the above.
+
+ * platform/text/DecodeEscapeSequences.h: Use Vector<uint8_t> instead of
+ Vector<char>, also updated the code that calls encode for the new return type
+ and updated for change to UnencodableHandler.
+
+ * platform/text/TextCodec.cpp:
+ (WebCore::TextCodec::getUnencodableReplacement): Updated since we are using
+ std::array now, so the out argument is easier to understand, also updated for
+ change to UnencodablaHandler.
+ * platform/text/TextCodec.h: Use std::array for the UnencodableReplacementArray
+ type, removed the overload of decode so there is only one decode function.
+
+ * platform/text/TextCodecICU.cpp:
+ (WebCore::ICUConverterWrapper::~ICUConverterWrapper): Deleted.
+ Not needed any more since we use ICUConverterPtr instead now.
+ (WebCore::cachedConverterICU): Deleted.
+ (WebCore::TextCodecICU::create): Deleted. Callers just use make_unique
+ directly now.
+ (WebCore::TextCodecICU::registerCodecs): Rewrote to use lambdas instead
+ of functions with void* pointers.
+ (WebCore::TextCodecICU::TextCodecICU): Moved initializers into the header.
+ (WebCore::TextCodecICU::~TextCodecICU): Moved the body of releaseICUConverter
+ in here. Also greatly simplified it now that ICUConverterPtr handles closing
+ it as needed.
+ (WebCore::TextCodecICU::releaseICUConverter const): Deleted.
+ (WebCore::TextCodecICU::createICUConverter const): Rewrote to simplfy now that
+ we can use ICUConverterPtr.
+ (WebCore::ErrorCallbackSetter::ErrorCallbackSetter): Take a reference instead
+ of a pointer.
+ (WebCore::ErrorCallbackSetter::~ErrorCallbackSetter): Ditto.
+ (WebCore::TextCodecICU::decode): Use equalLettersIgnoringASCIICase instead of
+ strcasecmp.
+ (WebCore::TextCodecICU::encode): Return a Vector instead of a CString.
+ Take a StringView instead of a pointer and length. Simplified
+ the backslash-as-currency-symbol code by using String::replace.
+ * platform/text/TextCodecICU.h: Updated for above. Fixed indentation.
+ Added a new ICUConverterPtr typedef that uses std::unique_ptr to close the
+ converter; combined with move semantics it simplifies things so we don't have
+ to be so careful about calling ucnv_close.
+
+ * platform/text/TextCodecLatin1.cpp: Renamed the Latin-1 to Unicode table from
+ "table" to latin1ConversionTable.
+ (WebCore::TextCodecLatin1::registerCodecs): Use a lambda.
+ (WebCore::encodeComplexWindowsLatin1): Return a Vector instad of CString.
+ Also use StringView::codePoints instead of our own U16_NEXT.
+ (WebCore::TextCodecLatin1::encode): More of the same.
+ * platform/text/TextCodecLatin1.h: Updated for the above.
+
+ * platform/text/TextCodecReplacement.cpp:
+ (WebCore::TextCodecReplacement::create): Deleted.
+ (WebCore::TextCodecReplacement::TextCodecReplacement): Deleted.
+ (WebCore::TextCodecReplacement::registerCodecs): Use a lambda.
+ * platform/text/TextCodecReplacement.h: Updated for the above.
+
+ * platform/text/TextCodecUTF16.cpp:
+ (WebCore::newStreamingTextDecoderUTF16LE): Deleted.
+ (WebCore::newStreamingTextDecoderUTF16BE): Deleted.
+ (WebCore::TextCodecUTF16::registerCodecs): Use lambdas.
+ (WebCore::TextCodecUTF16::encode): Return a Vector.
+ * platform/text/TextCodecUTF16.h: Updated for the above.
+
+ * platform/text/TextCodecUTF8.cpp:
+ (WebCore::TextCodecUTF8::registerCodecs): Use a lambda.
+ (WebCore::TextCodecUTF8::encode): Return a Vector.
+ * platform/text/TextCodecUTF8.h: Updated for the above.
+
+ * platform/text/TextCodecUserDefined.cpp:
+ (WebCore::newStreamingTextDecoderUserDefined): Deleted.
+ (WebCore::TextCodecUserDefined::registerCodecs): Use a lambda.
+ (WebCore::encodeComplexUserDefined): Return a Vector.
+ (WebCore::TextCodecUserDefined::encode): Ditto.
+ * platform/text/TextCodecUserDefined.h: Updated for the above.
+
+ * platform/text/TextEncoding.cpp: Changed TextCodecMap to just hold a
+ WTF::Function instead of holding a function and an additional data pointer.
+ (WebCore::TextEncoding::TextEncoding): Use equalLettersIgnoringASCIICase
+ instead of a special isReplacementEncoding function.
+ (WebCore::TextEncoding::encode const): Return a Vector instead of a CString.
+ Pass StringView instead of a pointer and length.
+ * platform/text/TextEncoding.h: Updated for the above.
+
+ * platform/text/TextEncodingRegistry.cpp:
+ (WebCore::addToTextCodecMap): Removed the additionalData pointer and used
+ WTFMove since NewTextCodecFunction is now a WTF::Function rather than a C
+ function pointer.
+ (WebCore::isReplacementEncoding): Deleted.
+ (WebCore::newTextCodec): Use find instead of get now that the fucntions in
+ the map are WTF::Function and can't be copied.
+ (WebCore::dumpTextEncodingNameMap): Deleted.
+ * platform/text/TextEncodingRegistry.h: Updated for the above.
+
+ * platform/text/win/TextCodecWin.cpp:
+ (WebCore::TextCodecWin::encode): Updated comment.
+
+ * xml/XMLHttpRequest.cpp:
+ (WebCore::XMLHttpRequest::send): Updated for change to UnencodableHandling.
+
+ * xml/XSLTUnicodeSort.cpp: Removed include of StringExtras.h.
+ * xml/parser/XMLDocumentParser.cpp: Ditto.
+ * xml/parser/XMLDocumentParserLibxml2.cpp: Ditto.
+
2017-12-06 Said Abou-Hallawa <sabouhallawa@apple.com>
Support the decoding="sync/async" syntax for image async attribute
}
#if ENABLE(STREAMS_API)
+
void FetchBody::consumeAsStream(FetchBodyOwner& owner, FetchBodySource& source)
{
bool closeStream = false;
closeStream = source.enqueue(ArrayBuffer::tryCreate(arrayBufferViewBody().baseAddress(), arrayBufferViewBody().byteLength()));
m_data = nullptr;
} else if (isText()) {
- auto data = UTF8Encoding().encode(textBody(), EntitiesForUnencodables);
- closeStream = source.enqueue(ArrayBuffer::tryCreate(data.data(), data.length()));
+ auto data = UTF8Encoding().encode(textBody(), UnencodableHandling::Entities);
+ closeStream = source.enqueue(ArrayBuffer::tryCreate(data.data(), data.size()));
m_data = nullptr;
} else if (isURLSearchParams()) {
- auto data = UTF8Encoding().encode(urlSearchParamsBody().toString(), EntitiesForUnencodables);
- closeStream = source.enqueue(ArrayBuffer::tryCreate(data.data(), data.length()));
+ auto data = UTF8Encoding().encode(urlSearchParamsBody().toString(), UnencodableHandling::Entities);
+ closeStream = source.enqueue(ArrayBuffer::tryCreate(data.data(), data.size()));
m_data = nullptr;
} else if (isBlob()) {
owner.loadBlob(blobBody(), nullptr);
if (closeStream)
source.close();
}
+
#endif
void FetchBody::consumeArrayBuffer(Ref<DeferredPromise>&& promise)
void FetchBody::consumeText(Ref<DeferredPromise>&& promise, const String& text)
{
- auto data = UTF8Encoding().encode(text, EntitiesForUnencodables);
- m_consumer.resolveWithData(WTFMove(promise), reinterpret_cast<const uint8_t*>(data.data()), data.length());
+ auto data = UTF8Encoding().encode(text, UnencodableHandling::Entities);
+ m_consumer.resolveWithData(WTFMove(promise), data.data(), data.size());
m_data = nullptr;
}
RefPtr<FormData> FetchBody::bodyAsFormData(ScriptExecutionContext& context) const
{
if (isText())
- return FormData::create(UTF8Encoding().encode(textBody(), EntitiesForUnencodables));
+ return FormData::create(UTF8Encoding().encode(textBody(), UnencodableHandling::Entities));
if (isURLSearchParams())
- return FormData::create(UTF8Encoding().encode(urlSearchParamsBody().toString(), EntitiesForUnencodables));
+ return FormData::create(UTF8Encoding().encode(urlSearchParamsBody().toString(), UnencodableHandling::Entities));
if (isBlob()) {
RefPtr<FormData> body = FormData::create();
body->appendBlob(blobBody().url());
if (isFormData())
return formDataBody();
- if (isText()) {
- auto data = UTF8Encoding().encode(textBody(), EntitiesForUnencodables);
- return SharedBuffer::create(data.data(), data.length());
- }
-
- if (isURLSearchParams()) {
- auto data = UTF8Encoding().encode(urlSearchParamsBody().toString(), EntitiesForUnencodables);
- return SharedBuffer::create(data.data(), data.length());
- }
+ if (isText())
+ return SharedBuffer::create(UTF8Encoding().encode(textBody(), UnencodableHandling::Entities));
+ if (isURLSearchParams())
+ return SharedBuffer::create(UTF8Encoding().encode(urlSearchParamsBody().toString(), UnencodableHandling::Entities));
if (isArrayBuffer())
return SharedBuffer::create(reinterpret_cast<const char*>(arrayBufferBody().data()), arrayBufferBody().byteLength());
#include <wtf/FastMalloc.h>
#include <wtf/HashMap.h>
#include <wtf/StdLibExtras.h>
-#include <wtf/StringExtras.h>
#include <wtf/text/StringHash.h>
#include <wtf/text/WTFString.h>
#include <zlib.h>
+2017-12-02 Darin Adler <darin@apple.com>
+
+ Modernize some aspects of text codecs, eliminate WebKit use of strcasecmp
+ https://bugs.webkit.org/show_bug.cgi?id=180009
+
+ Reviewed by Alex Christensen.
+
+ * PAL.xcodeproj/project.pbxproj: Added UnencodableHandling.h.
+ * pal/text/UnencodableHandling.h: Moved the UnencodableHandling enumeration
+ here from TextCodec.h and changed it to an enum class.
+
2017-12-04 David Quesada <david_quesada@apple.com>
Add a class for parsing application manifests
1C67CEA11E32EDA800F80F2E /* FeatureDefines.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; path = FeatureDefines.xcconfig; sourceTree = "<group>"; };
1C67CEA21E32EE2600F80F2E /* Version.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; path = Version.xcconfig; sourceTree = "<group>"; };
7A1656431F97B2B800BA3CE4 /* NSKeyedArchiverSPI.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = NSKeyedArchiverSPI.h; sourceTree = "<group>"; };
+ 93E5909C1F93BF1E0067F8CF /* UnencodableHandling.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = UnencodableHandling.h; sourceTree = "<group>"; };
A10265861F56746100B4C844 /* FoundationSPI.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = FoundationSPI.h; sourceTree = "<group>"; };
A10265881F56747A00B4C844 /* HIToolboxSPI.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = HIToolboxSPI.h; sourceTree = "<group>"; };
A102658A1F56748C00B4C844 /* QuickDrawSPI.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = QuickDrawSPI.h; sourceTree = "<group>"; };
A30D41231F0DD10500B71954 /* mac */,
A30D411F1F0DD0EA00B71954 /* KillRing.cpp */,
A30D411E1F0DD0EA00B71954 /* KillRing.h */,
+ 93E5909C1F93BF1E0067F8CF /* UnencodableHandling.h */,
);
path = text;
sourceTree = "<group>";
--- /dev/null
+/*
+ * Copyright (C) 2004-2017 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+namespace WebCore {
+
+// Specifies what will happen when a character is encountered that is
+// not encodable in the character set.
+enum class UnencodableHandling {
+ // Substitutes the replacement character "?".
+ QuestionMarks,
+
+ // Encodes the character as an XML entity. For example, U+06DE
+ // would be "۞" (0x6DE = 1758 in octal).
+ Entities,
+
+ // Encodes the character as en entity as above, but escaped
+ // non-alphanumeric characters. This is used in URLs.
+ // For example, U+6DE would be "%26%231758%3B".
+ URLEncodedEntities
+};
+
+}
+
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef IdentifierRep_h
-#define IdentifierRep_h
+#pragma once
#include <wtf/Assertions.h>
#include <wtf/FastMalloc.h>
-#include <wtf/StringExtras.h>
#include <string.h>
namespace WebCore {
} // namespace WebCore
-#endif // IdentifierRep_h
#include <wtf/Assertions.h>
#include <wtf/NeverDestroyed.h>
#include <wtf/StdLibExtras.h>
-#include <wtf/StringExtras.h>
#include <wtf/Vector.h>
using namespace WebCore;
{
if (!arrayBuffer)
return;
- m_appendableData.append(static_cast<const char*>(arrayBuffer->data()), arrayBuffer->byteLength());
+ m_appendableData.append(static_cast<const uint8_t*>(arrayBuffer->data()), arrayBuffer->byteLength());
}
void BlobBuilder::append(RefPtr<ArrayBufferView>&& arrayBufferView)
{
if (!arrayBufferView)
return;
- m_appendableData.append(static_cast<const char*>(arrayBufferView->baseAddress()), arrayBufferView->byteLength());
+ m_appendableData.append(static_cast<const uint8_t*>(arrayBufferView->baseAddress()), arrayBufferView->byteLength());
}
void BlobBuilder::append(RefPtr<Blob>&& blob)
void BlobBuilder::append(const String& text)
{
- CString utf8Text = UTF8Encoding().encode(text, EntitiesForUnencodables);
+ auto bytes = UTF8Encoding().encode(text, UnencodableHandling::Entities);
if (m_endings == BlobLineEndings::Native)
- normalizeAndAppendLineEndingsToNative(utf8Text, m_appendableData);
+ bytes = normalizeLineEndingsToNative(WTFMove(bytes));
+
+ if (m_appendableData.isEmpty())
+ m_appendableData = WTFMove(bytes);
else {
- ASSERT(m_endings == BlobLineEndings::Transparent);
- m_appendableData.append(utf8Text.data(), utf8Text.length());
+ // FIXME: Would it be better to move multiple vectors into m_items instead of merging them into one?
+ m_appendableData.appendVector(bytes);
}
}
/*
* Copyright (C) 2010 Google Inc. All Rights Reserved.
- * Copyright (C) 2015-2016 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2015-2017 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
#include "HTMLNames.h"
#include "HTMLParserIdioms.h"
+#include "TextCodec.h"
#include "TextEncodingRegistry.h"
namespace WebCore {
constexpr int bytesToCheckUnconditionally = 1024;
- m_input.append(m_codec->decode(data, length));
+ bool ignoredSawErrorFlag;
+ m_input.append(m_codec->decode(data, length, false, false, ignoredSawErrorFlag));
while (auto token = m_tokenizer.nextToken(m_input)) {
bool isEnd = token->type() == HTMLToken::EndTag;
// See <https://bugs.webkit.org/show_bug.cgi?id=155184>.
TextEncoding documentEncoding = element.document().textEncoding();
const TextEncoding& encodingToUse = documentEncoding.isValid() ? documentEncoding : UTF8Encoding();
- CString content = encodingToUse.encode(TextNodeTraversal::contentsAsString(element), EntitiesForUnencodables);
+ auto content = encodingToUse.encode(TextNodeTraversal::contentsAsString(element), UnencodableHandling::Entities);
auto cryptoDigest = PAL::CryptoDigest::create(PAL::CryptoDigest::Algorithm::SHA_256);
- cryptoDigest->addBytes(content.data(), content.length());
- Vector<uint8_t> digest = cryptoDigest->computeHash();
+ cryptoDigest->addBytes(content.data(), content.size());
+ auto digest = cryptoDigest->computeHash();
return makeString("sha256-", base64Encode(digest.data(), digest.size()));
}
return PAL::CryptoDigest::Algorithm::SHA_512;
}
-ResourceCryptographicDigest cryptographicDigestForBytes(ResourceCryptographicDigest::Algorithm algorithm, const char* bytes, size_t length)
+ResourceCryptographicDigest cryptographicDigestForBytes(ResourceCryptographicDigest::Algorithm algorithm, const void* bytes, size_t length)
{
auto cryptoDigest = PAL::CryptoDigest::create(toCryptoDigestAlgorithm(algorithm));
cryptoDigest->addBytes(bytes, length);
std::optional<ResourceCryptographicDigest> decodeEncodedResourceCryptographicDigest(const EncodedResourceCryptographicDigest&);
-ResourceCryptographicDigest cryptographicDigestForBytes(ResourceCryptographicDigest::Algorithm, const char* bytes, size_t length);
+ResourceCryptographicDigest cryptographicDigestForBytes(ResourceCryptographicDigest::Algorithm, const void* bytes, size_t length);
}
/*
Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)
- Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All rights reserved.
+ Copyright (C) 2003-2017 Apple Inc. All rights reserved.
Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com)
This library is free software; you can redistribute it and/or
#include "TextEncodingDetector.h"
#include "TextEncodingRegistry.h"
#include <wtf/ASCIICType.h>
-#include <wtf/StringExtras.h>
namespace WebCore {
return specifiedDefaultEncoding;
}
-TextResourceDecoder::TextResourceDecoder(const String& mimeType, const TextEncoding& specifiedDefaultEncoding, bool usesEncodingDetector)
+inline TextResourceDecoder::TextResourceDecoder(const String& mimeType, const TextEncoding& specifiedDefaultEncoding, bool usesEncodingDetector)
: m_contentType(determineContentType(mimeType))
, m_encoding(defaultEncoding(m_contentType, specifiedDefaultEncoding))
- , m_source(DefaultEncoding)
- , m_hintEncoding(nullptr)
- , m_checkedForBOM(false)
- , m_checkedForCSSCharset(false)
- , m_checkedForHeadCharset(false)
- , m_useLenientXMLDecoding(false)
- , m_sawError(false)
, m_usesEncodingDetector(usesEncodingDetector)
{
}
+Ref<TextResourceDecoder> TextResourceDecoder::create(const String& mimeType, const TextEncoding& defaultEncoding, bool usesEncodingDetector)
+{
+ return adoptRef(*new TextResourceDecoder(mimeType, defaultEncoding, usesEncodingDetector));
+}
+
TextResourceDecoder::~TextResourceDecoder() = default;
void TextResourceDecoder::setEncoding(const TextEncoding& encoding, EncodingSource source)
// When encoding comes from meta tag (i.e. it cannot be XML files sent via XHR),
// treat x-user-defined as windows-1252 (bug 18270)
- if (source == EncodingFromMetaTag && strcasecmp(encoding.name(), "x-user-defined") == 0)
+ if (source == EncodingFromMetaTag && equalLettersIgnoringASCIICase(encoding.name(), "x-user-defined"))
m_encoding = "windows-1252";
else if (source == EncodingFromMetaTag || source == EncodingFromXMLHeader || source == EncodingFromCSSCharset)
m_encoding = encoding.closestByteBasedEquivalent();
// in the first place.
bool TextResourceDecoder::shouldAutoDetect() const
{
- // Just checking m_hintEncoding suffices here because it's only set
- // in setHintEncoding when the source is AutoDetectedEncoding.
return m_usesEncodingDetector
- && (m_source == DefaultEncoding || (m_source == EncodingFromParentFrame && m_hintEncoding));
+ && (m_source == DefaultEncoding || (m_source == EncodingFromParentFrame && m_parentFrameAutoDetectedEncoding));
}
-String TextResourceDecoder::decode(const char* data, size_t len)
+String TextResourceDecoder::decode(const char* data, size_t length)
{
size_t lengthOfBOM = 0;
if (!m_checkedForBOM)
- lengthOfBOM = checkForBOM(data, len);
+ lengthOfBOM = checkForBOM(data, length);
bool movedDataToBuffer = false;
if (m_contentType == CSS && !m_checkedForCSSCharset)
- if (!checkForCSSCharset(data, len, movedDataToBuffer))
+ if (!checkForCSSCharset(data, length, movedDataToBuffer))
return emptyString();
if ((m_contentType == HTML || m_contentType == XML) && !m_checkedForHeadCharset) // HTML and XML
- if (!checkForHeadCharset(data, len, movedDataToBuffer))
+ if (!checkForHeadCharset(data, length, movedDataToBuffer))
return emptyString();
// FIXME: It is wrong to change the encoding downstream after we have already done some decoding.
if (shouldAutoDetect()) {
if (m_encoding.isJapanese())
- detectJapaneseEncoding(data, len); // FIXME: We should use detectTextEncoding() for all languages.
+ detectJapaneseEncoding(data, length); // FIXME: We should use detectTextEncoding() for all languages.
else {
TextEncoding detectedEncoding;
- if (detectTextEncoding(data, len, m_hintEncoding, &detectedEncoding))
+ if (detectTextEncoding(data, length, m_parentFrameAutoDetectedEncoding, &detectedEncoding))
setEncoding(detectedEncoding, AutoDetectedEncoding);
}
}
m_codec = newTextCodec(m_encoding);
if (m_buffer.isEmpty())
- return m_codec->decode(data + lengthOfBOM, len - lengthOfBOM, false, m_contentType == XML, m_sawError);
+ return m_codec->decode(data + lengthOfBOM, length - lengthOfBOM, false, m_contentType == XML, m_sawError);
if (!movedDataToBuffer) {
size_t oldSize = m_buffer.size();
- m_buffer.grow(oldSize + len);
- memcpy(m_buffer.data() + oldSize, data, len);
+ m_buffer.grow(oldSize + length);
+ memcpy(m_buffer.data() + oldSize, data, length);
}
String result = m_codec->decode(m_buffer.data() + lengthOfBOM, m_buffer.size() - lengthOfBOM, false, m_contentType == XML && !m_useLenientXMLDecoding, m_sawError);
String TextResourceDecoder::flush()
{
- // If we can not identify the encoding even after a document is completely
- // loaded, we need to detect the encoding if other conditions for
- // autodetection is satisfied.
+ // If we can not identify the encoding even after a document is completely
+ // loaded, we need to detect the encoding if other conditions for
+ // autodetection is satisfied.
if (m_buffer.size() && shouldAutoDetect()
&& ((!m_checkedForHeadCharset && (m_contentType == HTML || m_contentType == XML)) || (!m_checkedForCSSCharset && (m_contentType == CSS)))) {
- TextEncoding detectedEncoding;
- if (detectTextEncoding(m_buffer.data(), m_buffer.size(),
- m_hintEncoding, &detectedEncoding))
- setEncoding(detectedEncoding, AutoDetectedEncoding);
+ TextEncoding detectedEncoding;
+ if (detectTextEncoding(m_buffer.data(), m_buffer.size(), m_parentFrameAutoDetectedEncoding, &detectedEncoding))
+ setEncoding(detectedEncoding, AutoDetectedEncoding);
}
if (!m_codec)
/*
Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)
Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com)
- Copyright (C) 2006, 2008 Apple Inc. All rights reserved.
+ Copyright (C) 2006-2017 Apple Inc. All rights reserved.
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
namespace WebCore {
class HTMLMetaCharsetParser;
+class TextCodec;
class TextResourceDecoder : public RefCounted<TextResourceDecoder> {
public:
EncodingFromParentFrame
};
- static Ref<TextResourceDecoder> create(const String& mimeType, const TextEncoding& defaultEncoding = TextEncoding(), bool usesEncodingDetector = false)
- {
- return adoptRef(*new TextResourceDecoder(mimeType, defaultEncoding, usesEncodingDetector));
- }
+ WEBCORE_EXPORT static Ref<TextResourceDecoder> create(const String& mimeType, const TextEncoding& defaultEncoding = { }, bool usesEncodingDetector = false);
WEBCORE_EXPORT ~TextResourceDecoder();
void setEncoding(const TextEncoding&, EncodingSource);
const TextEncoding& encoding() const { return m_encoding; }
- bool hasEqualEncodingForCharset(const String&) const;
+ bool hasEqualEncodingForCharset(const String& charset) const;
WEBCORE_EXPORT String decode(const char* data, size_t length);
WEBCORE_EXPORT String flush();
WEBCORE_EXPORT String decodeAndFlush(const char* data, size_t length);
- void setHintEncoding(const TextResourceDecoder* hintDecoder)
- {
- // hintEncoding is for use with autodetection, which should be
- // only invoked when hintEncoding comes from auto-detection.
- if (hintDecoder && hintDecoder->m_source == AutoDetectedEncoding)
- m_hintEncoding = hintDecoder->encoding().name();
- }
+ void setHintEncoding(const TextResourceDecoder* parentFrameDecoder);
void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; }
bool sawError() const { return m_sawError; }
private:
- WEBCORE_EXPORT TextResourceDecoder(const String& mimeType, const TextEncoding& defaultEncoding, bool usesEncodingDetector);
+ TextResourceDecoder(const String& mimeType, const TextEncoding& defaultEncoding, bool usesEncodingDetector);
enum ContentType { PlainText, HTML, XML, CSS }; // PlainText only checks for BOM.
static ContentType determineContentType(const String& mimeType);
ContentType m_contentType;
TextEncoding m_encoding;
std::unique_ptr<TextCodec> m_codec;
- EncodingSource m_source;
- const char* m_hintEncoding;
- Vector<char> m_buffer;
- bool m_checkedForBOM;
- bool m_checkedForCSSCharset;
- bool m_checkedForHeadCharset;
- bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors.
- bool m_sawError;
- bool m_usesEncodingDetector;
-
std::unique_ptr<HTMLMetaCharsetParser> m_charsetParser;
+ EncodingSource m_source { DefaultEncoding };
+ const char* m_parentFrameAutoDetectedEncoding { nullptr };
+ Vector<char> m_buffer;
+ bool m_checkedForBOM { false };
+ bool m_checkedForCSSCharset { false };
+ bool m_checkedForHeadCharset { false };
+ bool m_useLenientXMLDecoding { false }; // Don't stop on XML decoding errors.
+ bool m_sawError { false };
+ bool m_usesEncodingDetector { false };
};
+inline void TextResourceDecoder::setHintEncoding(const TextResourceDecoder* parentFrameDecoder)
+{
+ if (parentFrameDecoder && parentFrameDecoder->m_source == AutoDetectedEncoding)
+ m_parentFrameAutoDetectedEncoding = parentFrameDecoder->encoding().name();
+}
+
} // namespace WebCore
#include "SecurityOriginData.h"
#include "URL.h"
#include <wtf/StdLibExtras.h>
-#include <wtf/StringExtras.h>
#include <wtf/UUID.h>
#include <wtf/text/CString.h>
#include <wtf/text/StringBuilder.h>
#include "Page.h"
#include "PageGroup.h"
#include <stdarg.h>
-#include <wtf/StringExtras.h>
#include <wtf/Vector.h>
#include <wtf/text/CString.h>
#include <wtf/text/StringBuilder.h>
return;
}
String text = accumulator.serializeNodes(*document->documentElement(), IncludeNode);
- CString frameHTML = textEncoding.encode(text, EntitiesForUnencodables);
- m_resources.append({ url, document->suggestedMIMEType(), SharedBuffer::create(frameHTML.data(), frameHTML.length()) });
+ m_resources.append({ url, document->suggestedMIMEType(), SharedBuffer::create(textEncoding.encode(text, UnencodableHandling::Entities)) });
m_resourceURLs.add(url);
for (auto& node : nodes) {
// FIXME: We should check whether a charset has been specified and if none was found add one.
TextEncoding textEncoding(styleSheet->contents().charset());
ASSERT(textEncoding.isValid());
- String textString = cssText.toString();
- CString text = textEncoding.encode(textString, EntitiesForUnencodables);
- m_resources.append({ url, ASCIILiteral { "text/css" }, SharedBuffer::create(text.data(), text.length()) });
+ m_resources.append({ url, ASCIILiteral { "text/css" }, SharedBuffer::create(textEncoding.encode(cssText.toString(), UnencodableHandling::Entities)) });
m_resourceURLs.add(url);
}
}
// FIXME: Compute the digest with respect to the raw bytes received from the page.
// See <https://bugs.webkit.org/show_bug.cgi?id=155184>.
- CString contentCString = encodingToUse.encode(content, EntitiesForUnencodables);
+ auto encodedContent = encodingToUse.encode(content, UnencodableHandling::Entities);
bool foundHashInEnforcedPolicies = false;
bool foundHashInReportOnlyPolicies = false;
for (auto algorithm : algorithms) {
- ContentSecurityPolicyHash hash = cryptographicDigestForBytes(algorithm, contentCString.data(), contentCString.length());
+ ContentSecurityPolicyHash hash = cryptographicDigestForBytes(algorithm, encodedContent.data(), encodedContent.size());
if (!foundHashInEnforcedPolicies && allPoliciesWithDispositionAllow(ContentSecurityPolicy::Disposition::Enforce, std::forward<Predicate>(predicate), hash))
foundHashInEnforcedPolicies = true;
if (!foundHashInReportOnlyPolicies && allPoliciesWithDispositionAllow(ContentSecurityPolicy::Disposition::ReportOnly, std::forward<Predicate>(predicate), hash))
#include "config.h"
#include "FileHandle.h"
-#include <wtf/StringExtras.h>
-
namespace WebCore {
FileHandle::FileHandle(const String& path, FileSystem::FileOpenMode mode)
return adoptRef(*new SharedBuffer(WTFMove(vector)));
}
+// FIXME: Move the whole class from Vector<char> to Vector<uint8_t> and make this efficient, replacing the Vector<char> version above.
+Ref<SharedBuffer> SharedBuffer::create(Vector<uint8_t>&& vector)
+{
+ return adoptRef(*new SharedBuffer { vector.data(), vector.size() });
+}
+
void SharedBuffer::combineIntoOneSegment() const
{
#if !ASSERT_DISABLED
static RefPtr<SharedBuffer> createWithContentsOfFile(const String& filePath);
static Ref<SharedBuffer> create(Vector<char>&&);
-
+ static Ref<SharedBuffer> create(Vector<uint8_t>&&);
+
#if USE(FOUNDATION)
RetainPtr<NSData> createNSData() const;
RetainPtr<NSArray> createNSDataArray() const;
void URLParser::encodeQuery(const Vector<UChar>& source, const TextEncoding& encoding, CodePointIterator<CharacterType> iterator)
{
// FIXME: It is unclear in the spec what to do when encoding fails. The behavior should be specified and tested.
- CString encoded = encoding.encode(StringView(source.data(), source.size()), URLEncodedEntitiesForUnencodables);
- const char* data = encoded.data();
- size_t length = encoded.length();
+ auto encoded = encoding.encode(StringView(source.data(), source.size()), UnencodableHandling::URLEncodedEntities);
+ auto* data = encoded.data();
+ size_t length = encoded.size();
if (!length == !iterator.atEnd()) {
syntaxViolation(iterator);
if (scheme == "clearkey") {
String keyID = [[[avRequest request] URL] resourceSpecifier];
- StringView keyIDView(keyID);
- CString utf8EncodedKeyId = UTF8Encoding().encode(keyIDView, URLEncodedEntitiesForUnencodables);
+ auto encodedKeyId = UTF8Encoding().encode(keyID, UnencodableHandling::URLEncodedEntities);
- RefPtr<Uint8Array> initData = Uint8Array::create(utf8EncodedKeyId.length());
- initData->setRange(reinterpret_cast<const JSC::Uint8Adaptor::Type*>(utf8EncodedKeyId.data()), utf8EncodedKeyId.length(), 0);
+ auto initData = Uint8Array::create(encodedKeyId.size());
+ initData->setRange(encodedKeyId.data(), encodedKeyId.size(), 0);
auto keyData = player()->cachedKeyForKeyId(keyID);
if (keyData) {
#include "PlatformCALayerClient.h"
#include <CoreFoundation/CoreFoundation.h>
#include <CoreText/CoreText.h>
-#include <wtf/StringExtras.h>
#include <wtf/text/TextStream.h>
#if PLATFORM(WIN)
Ref<FormData> FormData::create(const CString& string)
{
- auto result = create();
- result->appendData(string.data(), string.length());
- return result;
+ return create(string.data(), string.length());
}
Ref<FormData> FormData::create(const Vector<char>& vector)
{
- auto result = create();
- result->appendData(vector.data(), vector.size());
- return result;
+ return create(vector.data(), vector.size());
+}
+
+Ref<FormData> FormData::create(const Vector<uint8_t>& vector)
+{
+ return create(vector.data(), vector.size());
}
Ref<FormData> FormData::create(const DOMFormData& formData, EncodingType encodingType)
Ref<FormData> FormData::isolatedCopy() const
{
- // FIXME: isolatedCopy() (historically deepCopy()) only copies certain values from `this`. Why is that?
+ // FIXME: isolatedCopy() does not copy m_identifier, m_boundary, or m_containsPasswordData.
+ // Is all of that correct and intentional?
+
auto formData = create();
formData->m_alwaysStream = m_alwaysStream;
m_lengthInBytes = std::nullopt;
}
-static CString normalizeStringData(TextEncoding& encoding, const String& value)
+static Vector<uint8_t> normalizeStringData(TextEncoding& encoding, const String& value)
{
- return normalizeLineEndingsToCRLF(encoding.encode(value, EntitiesForUnencodables));
+ return normalizeLineEndingsToCRLF(encoding.encode(value, UnencodableHandling::Entities));
}
void FormData::appendMultiPartFileValue(const File& file, Vector<char>& header, TextEncoding& encoding, Document* document)
appendData(header.data(), header.size());
auto normalizedStringData = normalizeStringData(encoding, string);
- appendData(normalizedStringData.data(), normalizedStringData.length());
+ appendData(normalizedStringData.data(), normalizedStringData.size());
}
void FormData::appendMultiPartKeyValuePairItems(const DOMFormData& formData, Document* document)
class SharedBuffer;
class TextEncoding;
+// FIXME: Convert this to a Variant of structs and remove "Type" and also the "m_" prefixes from the data members.
+// The member functions can become non-member fucntions.
class FormDataElement {
public:
- enum class Type {
- Data,
- EncodedFile,
- EncodedBlob,
- };
+ enum class Type { Data, EncodedFile, EncodedBlob };
FormDataElement()
: m_type(Type::Data)
FormDataElement isolatedCopy() const;
- template<typename Encoder>
- void encode(Encoder&) const;
- template<typename Decoder>
- static std::optional<FormDataElement> decode(Decoder&);
+ template<typename Encoder> void encode(Encoder&) const;
+ template<typename Decoder> static std::optional<FormDataElement> decode(Decoder&);
Type m_type;
Vector<char> m_data;
int64_t m_fileLength;
double m_expectedFileModificationTime;
// FIXME: Generated file support in FormData is almost identical to Blob, they should be merged.
- // We can't just switch to using Blobs for all files for two reasons:
- // 1. Not all platforms enable BLOB support.
- // 2. EncodedFile form data elements do not have a valid m_expectedFileModificationTime, meaning that we always upload the latest content from disk.
+ // We can't just switch to using Blobs for all files because EncodedFile form data elements do not
+ // have a valid m_expectedFileModificationTime, meaning we always upload the latest content from disk.
String m_generatedFilename;
bool m_shouldGenerateFile;
bool m_ownsGeneratedFile;
return !(a == b);
}
-
template<typename Encoder>
void FormDataElement::encode(Encoder& encoder) const
{
WEBCORE_EXPORT static Ref<FormData> create(const void*, size_t);
static Ref<FormData> create(const CString&);
static Ref<FormData> create(const Vector<char>&);
+ static Ref<FormData> create(const Vector<uint8_t>&);
static Ref<FormData> create(const DOMFormData&, EncodingType = FormURLEncoded);
static Ref<FormData> createMultiPart(const DOMFormData&, Document*);
WEBCORE_EXPORT ~FormData();
encoder << m_boundary;
encoder << m_elements;
encoder << m_identifier;
+ // FIXME: Does not encode m_containsPasswordData. Why is that OK?
}
template<typename Decoder>
* Copyright (C) 1999 Lars Knoll (knoll@kde.org)
* (C) 1999 Antti Koivisto (koivisto@kde.org)
* (C) 2001 Dirk Mueller (mueller@kde.org)
- * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
+ * Copyright (C) 2004-2017 Apple Inc. All rights reserved.
* (C) 2006 Alexey Proskuryakov (ap@nypop.com)
* Copyright (C) 2008 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
*
namespace WebCore {
-// Helper functions
+namespace FormDataBuilder {
+
static inline void append(Vector<char>& buffer, char string)
{
buffer.append(string);
buffer.append(string.data(), string.length());
}
-static void appendQuotedString(Vector<char>& buffer, const CString& string)
+static inline void append(Vector<char>& buffer, const Vector<uint8_t>& string)
+{
+ buffer.appendVector(string);
+}
+
+static void appendQuoted(Vector<char>& buffer, const Vector<uint8_t>& string)
{
// Append a string as a quoted value, escaping quotes and line breaks.
- // FIXME: Is it correct to use percent escaping here? Other browsers do not encode these characters yet,
- // so we should test popular servers to find out if there is an encoding form they can handle.
- size_t length = string.length();
- for (size_t i = 0; i < length; ++i) {
- char c = string.data()[i];
- switch (c) {
+ // FIXME: Is it correct to use percent escaping here? When this code was originally written,
+ // other browsers were not encoding these characters, so someone should test servers or do
+ // research to find out if there is an encoding form that works well.
+ // FIXME: If we want to use percent escaping sensibly, we need to escape "%" characters too.
+ size_t size = string.size();
+ for (size_t i = 0; i < size; ++i) {
+ auto character = string[i];
+ switch (character) {
case 0xA:
append(buffer, "%0A");
break;
append(buffer, "%22");
break;
default:
- append(buffer, c);
+ append(buffer, character);
+ }
+ }
+}
+
+// https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer
+static void appendFormURLEncoded(Vector<char>& buffer, const uint8_t* string, size_t length)
+{
+ static const char safeCharacters[] = "-._*";
+ for (size_t i = 0; i < length; ++i) {
+ auto character = string[i];
+ if (isASCIIAlphanumeric(character) || strchr(safeCharacters, character))
+ append(buffer, character);
+ else if (character == ' ')
+ append(buffer, '+');
+ else if (character == '\n' || (character == '\r' && (i + 1 >= length || string[i + 1] != '\n')))
+ append(buffer, "%0D%0A"); // FIXME: Unclear exactly where this rule about normalizing line endings to CRLF comes from.
+ else if (character != '\r') {
+ append(buffer, '%');
+ appendByteAsHex(character, buffer);
}
}
}
-Vector<char> FormDataBuilder::generateUniqueBoundaryString()
+static void appendFormURLEncoded(Vector<char>& buffer, const Vector<uint8_t>& string)
+{
+ appendFormURLEncoded(buffer, string.data(), string.size());
+}
+
+Vector<char> generateUniqueBoundaryString()
{
Vector<char> boundary;
return boundary;
}
-void FormDataBuilder::beginMultiPartHeader(Vector<char>& buffer, const CString& boundary, const CString& name)
+void beginMultiPartHeader(Vector<char>& buffer, const CString& boundary, const Vector<uint8_t>& name)
{
addBoundaryToMultiPartHeader(buffer, boundary);
// FIXME: This loses data irreversibly if the input name includes characters you can't encode
// in the website's character set.
append(buffer, "Content-Disposition: form-data; name=\"");
- appendQuotedString(buffer, name);
+ appendQuoted(buffer, name);
append(buffer, '"');
}
-void FormDataBuilder::addBoundaryToMultiPartHeader(Vector<char>& buffer, const CString& boundary, bool isLastBoundary)
+void addBoundaryToMultiPartHeader(Vector<char>& buffer, const CString& boundary, bool isLastBoundary)
{
append(buffer, "--");
append(buffer, boundary);
append(buffer, "\r\n");
}
-void FormDataBuilder::addFilenameToMultiPartHeader(Vector<char>& buffer, const TextEncoding& encoding, const String& filename)
+void addFilenameToMultiPartHeader(Vector<char>& buffer, const TextEncoding& encoding, const String& filename)
{
// FIXME: This loses data irreversibly if the filename includes characters you can't encode
// in the website's character set.
append(buffer, "; filename=\"");
- appendQuotedString(buffer, encoding.encode(filename, QuestionMarksForUnencodables));
+ appendQuoted(buffer, encoding.encode(filename, UnencodableHandling::QuestionMarks));
append(buffer, '"');
}
-void FormDataBuilder::addContentTypeToMultiPartHeader(Vector<char>& buffer, const CString& mimeType)
+void addContentTypeToMultiPartHeader(Vector<char>& buffer, const CString& mimeType)
{
ASSERT(Blob::isNormalizedContentType(mimeType));
append(buffer, "\r\nContent-Type: ");
append(buffer, mimeType);
}
-void FormDataBuilder::finishMultiPartHeader(Vector<char>& buffer)
+void finishMultiPartHeader(Vector<char>& buffer)
{
append(buffer, "\r\n\r\n");
}
-void FormDataBuilder::addKeyValuePairAsFormData(Vector<char>& buffer, const CString& key, const CString& value, FormData::EncodingType encodingType)
+void addKeyValuePairAsFormData(Vector<char>& buffer, const Vector<uint8_t>& key, const Vector<uint8_t>& value, FormData::EncodingType encodingType)
{
if (encodingType == FormData::TextPlain) {
if (!buffer.isEmpty())
} else {
if (!buffer.isEmpty())
append(buffer, '&');
- encodeStringAsFormData(buffer, key);
+ appendFormURLEncoded(buffer, key);
append(buffer, '=');
- encodeStringAsFormData(buffer, value);
+ appendFormURLEncoded(buffer, value);
}
}
-void FormDataBuilder::encodeStringAsFormData(Vector<char>& buffer, const CString& string)
+void encodeStringAsFormData(Vector<char>& buffer, const CString& string)
{
- // Same safe characters as Netscape for compatibility.
- static const char safeCharacters[] = "-._*";
-
- // http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1
- unsigned length = string.length();
- for (unsigned i = 0; i < length; ++i) {
- unsigned char c = string.data()[i];
+ appendFormURLEncoded(buffer, reinterpret_cast<const uint8_t*>(string.data()), string.length());
+}
- if (isASCIIAlphanumeric(c) || strchr(safeCharacters, c))
- append(buffer, c);
- else if (c == ' ')
- append(buffer, '+');
- else if (c == '\n' || (c == '\r' && (i + 1 >= length || string.data()[i + 1] != '\n')))
- append(buffer, "%0D%0A");
- else if (c != '\r') {
- append(buffer, '%');
- appendByteAsHex(c, buffer);
- }
- }
}
}
/*
* Copyright (C) 2008 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
+ * Copyright (C) 2017 Apple Inc. All rights reserved.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
*
*/
-#ifndef FormDataBuilder_h
-#define FormDataBuilder_h
+#pragma once
#include "FormData.h"
#include <wtf/Forward.h>
class TextEncoding;
-class FormDataBuilder {
- WTF_MAKE_NONCOPYABLE(FormDataBuilder);
-public:
- // Helper functions used by HTMLFormElement for multi-part form data
- static Vector<char> generateUniqueBoundaryString();
- static void beginMultiPartHeader(Vector<char>&, const CString& boundary, const CString& name);
- static void addBoundaryToMultiPartHeader(Vector<char>&, const CString& boundary, bool isLastBoundary = false);
- static void addFilenameToMultiPartHeader(Vector<char>&, const TextEncoding&, const String& filename);
- static void addContentTypeToMultiPartHeader(Vector<char>&, const CString& mimeType);
- static void finishMultiPartHeader(Vector<char>&);
-
- // Helper functions used by HTMLFormElement for non multi-part form data
- static void addKeyValuePairAsFormData(Vector<char>&, const CString& key, const CString& value, FormData::EncodingType = FormData::FormURLEncoded);
- static void encodeStringAsFormData(Vector<char>&, const CString&);
-
-private:
- FormDataBuilder() {}
-};
+namespace FormDataBuilder {
+
+// Helper functions used by HTMLFormElement for multi-part form data.
+Vector<char> generateUniqueBoundaryString();
+void beginMultiPartHeader(Vector<char>&, const CString& boundary, const Vector<uint8_t>& name);
+void addBoundaryToMultiPartHeader(Vector<char>&, const CString& boundary, bool isLastBoundary = false);
+void addFilenameToMultiPartHeader(Vector<char>&, const TextEncoding&, const String& filename);
+void addContentTypeToMultiPartHeader(Vector<char>&, const CString& mimeType);
+void finishMultiPartHeader(Vector<char>&);
+
+// Helper functions used by HTMLFormElement for non-multi-part form data.
+void addKeyValuePairAsFormData(Vector<char>&, const Vector<uint8_t>& key, const Vector<uint8_t>& value, FormData::EncodingType = FormData::FormURLEncoded);
+void encodeStringAsFormData(Vector<char>&, const CString&);
}
-#endif
+}
// didReceiveResponse might cause the client to be deleted.
if (m_handle->client()) {
- CString encodedData = encoding.encode(data, URLEncodedEntitiesForUnencodables);
- if (encodedData.length())
- m_handle->client()->didReceiveBuffer(m_handle, SharedBuffer::create(encodedData.data(), encodedData.length()), originalSize);
+ auto encodedData = encoding.encode(data, UnencodableHandling::URLEncodedEntities);
+ if (encodedData.size())
+ m_handle->client()->didReceiveBuffer(m_handle, SharedBuffer::create(WTFMove(encodedData)), originalSize);
}
}
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef DecodeEscapeSequences_h
-#define DecodeEscapeSequences_h
+#pragma once
#include "TextEncoding.h"
#include <wtf/ASCIICType.h>
return result.toString();
}
-inline Vector<char> decodeURLEscapeSequencesAsData(StringView string, const TextEncoding& encoding)
+inline Vector<uint8_t> decodeURLEscapeSequencesAsData(StringView string, const TextEncoding& encoding)
{
ASSERT(encoding.isValid());
- Vector<char> result;
+ Vector<uint8_t> result;
size_t decodedPosition = 0;
size_t searchPosition = 0;
while (true) {
continue;
}
}
+
// Strings are encoded as requested.
- auto stringFragment = string.substring(decodedPosition, encodedRunPosition - decodedPosition);
- auto encodedStringFragment = encoding.encode(stringFragment, URLEncodedEntitiesForUnencodables);
- result.append(encodedStringFragment.data(), encodedStringFragment.length());
+ result.appendVector(encoding.encode(string.substring(decodedPosition, encodedRunPosition - decodedPosition), UnencodableHandling::URLEncodedEntities));
if (encodedRunPosition == notFound)
return result;
} // namespace WebCore
-#endif // DecodeEscapeSequences_h
/*
- * Copyright (C) 2004, 2006 Apple Inc. All rights reserved.
+ * Copyright (C) 2004-2017 Apple Inc. All rights reserved.
* Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
*
* Redistribution and use in source and binary forms, with or without
#include "config.h"
#include "TextCodec.h"
-#include <wtf/StringExtras.h>
-#include <wtf/text/WTFString.h>
+#include <array>
namespace WebCore {
-TextCodec::~TextCodec() = default;
-
-int TextCodec::getUnencodableReplacement(unsigned codePoint, UnencodableHandling handling, UnencodableReplacementArray replacement)
+int TextCodec::getUnencodableReplacement(UChar32 codePoint, UnencodableHandling handling, UnencodableReplacementArray& replacement)
{
switch (handling) {
- case QuestionMarksForUnencodables:
- replacement[0] = '?';
- replacement[1] = 0;
- return 1;
- case EntitiesForUnencodables:
- snprintf(replacement, sizeof(UnencodableReplacementArray), "&#%u;", codePoint);
- return static_cast<int>(strlen(replacement));
- case URLEncodedEntitiesForUnencodables:
- snprintf(replacement, sizeof(UnencodableReplacementArray), "%%26%%23%u%%3B", codePoint);
- return static_cast<int>(strlen(replacement));
+ case UnencodableHandling::QuestionMarks:
+ replacement.data()[0] = '?';
+ replacement.data()[1] = 0;
+ return 1;
+ case UnencodableHandling::Entities:
+ return snprintf(replacement.data(), sizeof(UnencodableReplacementArray), "&#%u;", codePoint);
+ case UnencodableHandling::URLEncodedEntities:
+ return snprintf(replacement.data(), sizeof(UnencodableReplacementArray), "%%26%%23%u%%3B", codePoint);
}
ASSERT_NOT_REACHED();
- replacement[0] = 0;
+ replacement.data()[0] = 0;
return 0;
}
/*
- * Copyright (C) 2004, 2006 Apple Inc. All rights reserved.
+ * Copyright (C) 2004-2017 Apple Inc. All rights reserved.
* Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
*
* Redistribution and use in source and binary forms, with or without
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef TextCodec_h
-#define TextCodec_h
+#pragma once
#include <memory>
+#include <pal/text/UnencodableHandling.h>
+#include <unicode/umachine.h>
#include <wtf/Forward.h>
#include <wtf/Noncopyable.h>
-#include <wtf/text/WTFString.h>
namespace WebCore {
- class TextEncoding;
- // Specifies what will happen when a character is encountered that is
- // not encodable in the character set.
- enum UnencodableHandling {
- // Substitutes the replacement character "?".
- QuestionMarksForUnencodables,
+class TextEncoding;
- // Encodes the character as an XML entity. For example, U+06DE
- // would be "۞" (0x6DE = 1758 in octal).
- EntitiesForUnencodables,
+using UnencodableReplacementArray = std::array<char, 32>;
- // Encodes the character as en entity as above, but escaped
- // non-alphanumeric characters. This is used in URLs.
- // For example, U+6DE would be "%26%231758%3B".
- URLEncodedEntitiesForUnencodables
- };
+class TextCodec {
+ WTF_MAKE_NONCOPYABLE(TextCodec); WTF_MAKE_FAST_ALLOCATED;
+public:
+ TextCodec() = default;
+ virtual ~TextCodec() = default;
- typedef char UnencodableReplacementArray[32];
+ virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError) = 0;
+ virtual Vector<uint8_t> encode(StringView, UnencodableHandling) = 0;
- class TextCodec {
- WTF_MAKE_NONCOPYABLE(TextCodec); WTF_MAKE_FAST_ALLOCATED;
- public:
- TextCodec() { }
- virtual ~TextCodec();
+ // Fills a null-terminated string representation of the given
+ // unencodable character into the given replacement buffer.
+ // The length of the string (not including the null) will be returned.
+ static int getUnencodableReplacement(UChar32, UnencodableHandling, UnencodableReplacementArray&);
+};
- String decode(const char* str, size_t length, bool flush = false)
- {
- bool ignored;
- return decode(str, length, flush, false, ignored);
- }
-
- virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError) = 0;
- virtual CString encode(const UChar*, size_t length, UnencodableHandling) = 0;
+using EncodingNameRegistrar = void (*)(const char* alias, const char* name);
- // Fills a null-terminated string representation of the given
- // unencodable character into the given replacement buffer.
- // The length of the string (not including the null) will be returned.
- static int getUnencodableReplacement(unsigned codePoint, UnencodableHandling, UnencodableReplacementArray);
- };
-
- typedef void (*EncodingNameRegistrar)(const char* alias, const char* name);
-
- typedef std::unique_ptr<TextCodec> (*NewTextCodecFunction)(const TextEncoding&, const void* additionalData);
- typedef void (*TextCodecRegistrar)(const char* name, NewTextCodecFunction, const void* additionalData);
+using NewTextCodecFunction = WTF::Function<std::unique_ptr<TextCodec>()>;
+using TextCodecRegistrar = void (*)(const char* name, NewTextCodecFunction&&);
} // namespace WebCore
-#endif // TextCodec_h
/*
- * Copyright (C) 2004, 2006, 2007, 2008, 2011 Apple Inc. All rights reserved.
+ * Copyright (C) 2004-2017 Apple Inc. All rights reserved.
* Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
*
* Redistribution and use in source and binary forms, with or without
#include "TextEncoding.h"
#include "TextEncodingRegistry.h"
#include "ThreadGlobalData.h"
-#include <unicode/ucnv.h>
+#include <array>
#include <unicode/ucnv_cb.h>
-#include <wtf/Assertions.h>
-#include <wtf/StringExtras.h>
#include <wtf/Threading.h>
#include <wtf/text/CString.h>
#include <wtf/text/StringBuilder.h>
const size_t ConversionBufferSize = 16384;
-ICUConverterWrapper::~ICUConverterWrapper()
-{
- if (converter)
- ucnv_close(converter);
-}
-
-static UConverter*& cachedConverterICU()
-{
- return threadGlobalData().cachedConverterICU().converter;
-}
-
-std::unique_ptr<TextCodec> TextCodecICU::create(const TextEncoding& encoding, const void* additionalData)
-{
- // Name strings are persistently kept in TextEncodingRegistry maps, so they are never deleted.
- return std::make_unique<TextCodecICU>(encoding.name(), static_cast<const char*>(additionalData));
-}
-
#define DECLARE_ALIASES(encoding, ...) \
static const char* const encoding##_aliases[] { __VA_ARGS__ }
void TextCodecICU::registerCodecs(TextCodecRegistrar registrar)
{
for (auto& encodingName : encodingNames) {
+ const char* name = encodingName.name;
+
// These encodings currently don't have standard names, so we need to register encoders manually.
// http://demo.icu-project.org/icu-bin/convexp
- if (!strcmp(encodingName.name, "windows-874")) {
- registrar(encodingName.name, create, "windows-874-2000");
+ if (!strcmp(name, "windows-874")) {
+ registrar(name, [name] {
+ return std::make_unique<TextCodecICU>(name, "windows-874-2000");
+ });
continue;
}
- if (!strcmp(encodingName.name, "windows-949")) {
- registrar(encodingName.name, create, "windows-949-2000");
+ if (!strcmp(name, "windows-949")) {
+ registrar(name, [name] {
+ return std::make_unique<TextCodecICU>(name, "windows-949-2000");
+ });
continue;
}
- if (!strcmp(encodingName.name, "x-mac-cyrillic")) {
- registrar(encodingName.name, create, "macos-7_3-10.2");
+ if (!strcmp(name, "x-mac-cyrillic")) {
+ registrar(name, [name] {
+ return std::make_unique<TextCodecICU>(name, "macos-7_3-10.2");
+ });
continue;
}
- if (!strcmp(encodingName.name, "x-mac-greek")) {
- registrar(encodingName.name, create, "macos-6_2-10.4");
+ if (!strcmp(name, "x-mac-greek")) {
+ registrar(name, [name] {
+ return std::make_unique<TextCodecICU>(name, "macos-6_2-10.4");
+ });
continue;
}
- if (!strcmp(encodingName.name, "x-mac-centraleurroman")) {
- registrar(encodingName.name, create, "macos-29-10.2");
+ if (!strcmp(name, "x-mac-centraleurroman")) {
+ registrar(name, [name] {
+ return std::make_unique<TextCodecICU>(name, "macos-29-10.2");
+ });
continue;
}
- if (!strcmp(encodingName.name, "x-mac-turkish")) {
- registrar(encodingName.name, create, "macos-35-10.2");
+ if (!strcmp(name, "x-mac-turkish")) {
+ registrar(name, [name] {
+ return std::make_unique<TextCodecICU>(name, "macos-35-10.2");
+ });
continue;
}
- if (!strcmp(encodingName.name, "EUC-KR")) {
- registrar(encodingName.name, create, "windows-949");
+ if (!strcmp(name, "EUC-KR")) {
+ registrar(name, [name] {
+ return std::make_unique<TextCodecICU>(name, "windows-949");
+ });
continue;
}
UErrorCode error = U_ZERO_ERROR;
- const char* canonicalConverterName = ucnv_getCanonicalName(encodingName.name, "IANA", &error);
+ const char* canonicalConverterName = ucnv_getCanonicalName(name, "IANA", &error);
ASSERT(U_SUCCESS(error));
- registrar(encodingName.name, create, canonicalConverterName);
+ registrar(name, [name, canonicalConverterName] {
+ return std::make_unique<TextCodecICU>(name, canonicalConverterName);
+ });
}
}
TextCodecICU::TextCodecICU(const char* encoding, const char* canonicalConverterName)
: m_encodingName(encoding)
, m_canonicalConverterName(canonicalConverterName)
- , m_converterICU(0)
- , m_needsGBKFallbacks(false)
{
}
TextCodecICU::~TextCodecICU()
{
- releaseICUConverter();
-}
-
-void TextCodecICU::releaseICUConverter() const
-{
- if (m_converterICU) {
- UConverter*& cachedConverter = cachedConverterICU();
- if (cachedConverter)
- ucnv_close(cachedConverter);
- ucnv_reset(m_converterICU);
- cachedConverter = m_converterICU;
- m_converterICU = 0;
+ if (m_converter) {
+ ucnv_reset(m_converter.get());
+ threadGlobalData().cachedConverterICU().converter = WTFMove(m_converter);
}
}
void TextCodecICU::createICUConverter() const
{
- ASSERT(!m_converterICU);
-
- UErrorCode err;
+ ASSERT(!m_converter);
m_needsGBKFallbacks = !strcmp(m_encodingName, "GBK");
- UConverter*& cachedConverter = cachedConverterICU();
+ auto& cachedConverter = threadGlobalData().cachedConverterICU().converter;
if (cachedConverter) {
- err = U_ZERO_ERROR;
- const char* cachedConverterName = ucnv_getName(cachedConverter, &err);
- if (U_SUCCESS(err) && !strcmp(m_canonicalConverterName, cachedConverterName)) {
- m_converterICU = cachedConverter;
- cachedConverter = 0;
+ UErrorCode error = U_ZERO_ERROR;
+ const char* cachedConverterName = ucnv_getName(cachedConverter.get(), &error);
+ if (U_SUCCESS(error) && !strcmp(m_canonicalConverterName, cachedConverterName)) {
+ m_converter = WTFMove(cachedConverter);
return;
}
}
- err = U_ZERO_ERROR;
- m_converterICU = ucnv_open(m_canonicalConverterName, &err);
- ASSERT(U_SUCCESS(err));
- if (m_converterICU)
- ucnv_setFallback(m_converterICU, TRUE);
+ UErrorCode error = U_ZERO_ERROR;
+ m_converter = ICUConverterPtr { ucnv_open(m_canonicalConverterName, &error), ucnv_close };
+ if (m_converter)
+ ucnv_setFallback(m_converter.get(), TRUE);
}
-int TextCodecICU::decodeToBuffer(UChar* target, UChar* targetLimit, const char*& source, const char* sourceLimit, int32_t* offsets, bool flush, UErrorCode& err)
+int TextCodecICU::decodeToBuffer(UChar* target, UChar* targetLimit, const char*& source, const char* sourceLimit, int32_t* offsets, bool flush, UErrorCode& error)
{
UChar* targetStart = target;
- err = U_ZERO_ERROR;
- ucnv_toUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, offsets, flush, &err);
+ error = U_ZERO_ERROR;
+ ucnv_toUnicode(m_converter.get(), &target, targetLimit, &source, sourceLimit, offsets, flush, &error);
return target - targetStart;
}
class ErrorCallbackSetter {
public:
- ErrorCallbackSetter(UConverter* converter, bool stopOnError)
+ ErrorCallbackSetter(UConverter& converter, bool stopOnError)
: m_converter(converter)
, m_shouldStopOnEncodingErrors(stopOnError)
{
if (m_shouldStopOnEncodingErrors) {
UErrorCode err = U_ZERO_ERROR;
- ucnv_setToUCallBack(m_converter, UCNV_TO_U_CALLBACK_SUBSTITUTE,
- UCNV_SUB_STOP_ON_ILLEGAL, &m_savedAction,
- &m_savedContext, &err);
+ ucnv_setToUCallBack(&m_converter, UCNV_TO_U_CALLBACK_SUBSTITUTE, UCNV_SUB_STOP_ON_ILLEGAL, &m_savedAction, &m_savedContext, &err);
ASSERT(err == U_ZERO_ERROR);
}
}
UErrorCode err = U_ZERO_ERROR;
const void* oldContext;
UConverterToUCallback oldAction;
- ucnv_setToUCallBack(m_converter, m_savedAction,
- m_savedContext, &oldAction,
- &oldContext, &err);
+ ucnv_setToUCallBack(&m_converter, m_savedAction, m_savedContext, &oldAction, &oldContext, &err);
ASSERT(oldAction == UCNV_TO_U_CALLBACK_SUBSTITUTE);
ASSERT(!strcmp(static_cast<const char*>(oldContext), UCNV_SUB_STOP_ON_ILLEGAL));
ASSERT(err == U_ZERO_ERROR);
}
private:
- UConverter* m_converter;
+ UConverter& m_converter;
bool m_shouldStopOnEncodingErrors;
const void* m_savedContext;
UConverterToUCallback m_savedAction;
String TextCodecICU::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError)
{
// Get a converter for the passed-in encoding.
- if (!m_converterICU) {
+ if (!m_converter) {
createICUConverter();
- ASSERT(m_converterICU);
- if (!m_converterICU) {
+ if (!m_converter) {
LOG_ERROR("error creating ICU encoder even though encoding was in table");
- return String();
+ sawError = true;
+ return { };
}
}
- ErrorCallbackSetter callbackSetter(m_converterICU, stopOnError);
+ ErrorCallbackSetter callbackSetter(*m_converter, stopOnError);
StringBuilder result;
String resultString = result.toString();
- // <http://bugs.webkit.org/show_bug.cgi?id=17014>
// Simplified Chinese pages use the code A3A0 to mean "full-width space", but ICU decodes it as U+E5E5.
- // FIXME: strcasecmp is locale sensitive, we should not be using it.
- if (strcmp(m_encodingName, "GBK") == 0 || strcasecmp(m_encodingName, "gb18030") == 0)
+ if (!strcmp(m_encodingName, "GBK") || equalLettersIgnoringASCIICase(m_encodingName, "gb18030"))
resultString.replace(0xE5E5, ideographicSpace);
return resultString;
}
// We need to apply these fallbacks ourselves as they are not currently supported by ICU and
-// they were provided by the old TEC encoding path. Needed to fix <rdar://problem/4708689>.
+// they were provided by the Mac TEC encoding path. Needed to fix <rdar://problem/4708689>.
static UChar fallbackForGBK(UChar32 character)
{
switch (character) {
// Invalid character handler when writing escaped entities for unrepresentable
// characters. See the declaration of TextCodec::encode for more.
static void urlEscapedEntityCallback(const void* context, UConverterFromUnicodeArgs* fromUArgs, const UChar* codeUnits, int32_t length,
- UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err)
+ UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* error)
{
if (reason == UCNV_UNASSIGNED) {
- *err = U_ZERO_ERROR;
-
+ *error = U_ZERO_ERROR;
UnencodableReplacementArray entity;
- int entityLen = TextCodec::getUnencodableReplacement(codePoint, URLEncodedEntitiesForUnencodables, entity);
- ucnv_cbFromUWriteBytes(fromUArgs, entity, entityLen, 0, err);
+ int entityLen = TextCodec::getUnencodableReplacement(codePoint, UnencodableHandling::URLEncodedEntities, entity);
+ ucnv_cbFromUWriteBytes(fromUArgs, entity.data(), entityLen, 0, error);
} else
- UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint, reason, err);
+ UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint, reason, error);
}
// Substitutes special GBK characters, escaping all other unassigned entities.
static void gbkCallbackEscape(const void* context, UConverterFromUnicodeArgs* fromUArgs, const UChar* codeUnits, int32_t length,
- UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err)
+ UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* error)
{
UChar outChar;
if (reason == UCNV_UNASSIGNED && (outChar = fallbackForGBK(codePoint))) {
const UChar* source = &outChar;
- *err = U_ZERO_ERROR;
- ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err);
+ *error = U_ZERO_ERROR;
+ ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, error);
return;
}
- UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint, reason, err);
+ UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint, reason, error);
}
// Combines both gbkUrlEscapedEntityCallback and GBK character substitution.
static void gbkUrlEscapedEntityCallack(const void* context, UConverterFromUnicodeArgs* fromUArgs, const UChar* codeUnits, int32_t length,
- UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err)
+ UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* error)
{
if (reason == UCNV_UNASSIGNED) {
if (UChar outChar = fallbackForGBK(codePoint)) {
const UChar* source = &outChar;
- *err = U_ZERO_ERROR;
- ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err);
+ *error = U_ZERO_ERROR;
+ ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, error);
return;
}
- urlEscapedEntityCallback(context, fromUArgs, codeUnits, length, codePoint, reason, err);
+ urlEscapedEntityCallback(context, fromUArgs, codeUnits, length, codePoint, reason, error);
return;
}
- UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint, reason, err);
+ UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint, reason, error);
}
static void gbkCallbackSubstitute(const void* context, UConverterFromUnicodeArgs* fromUArgs, const UChar* codeUnits, int32_t length,
- UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err)
+ UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* error)
{
UChar outChar;
if (reason == UCNV_UNASSIGNED && (outChar = fallbackForGBK(codePoint))) {
const UChar* source = &outChar;
- *err = U_ZERO_ERROR;
- ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err);
+ *error = U_ZERO_ERROR;
+ ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, error);
return;
}
- UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codePoint, reason, err);
+ UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codePoint, reason, error);
}
-CString TextCodecICU::encode(const UChar* characters, size_t length, UnencodableHandling handling)
+Vector<uint8_t> TextCodecICU::encode(StringView string, UnencodableHandling handling)
{
- if (!length)
- return "";
+ if (string.isEmpty())
+ return { };
- if (!m_converterICU)
+ if (!m_converter) {
createICUConverter();
- if (!m_converterICU)
- return CString();
+ if (!m_converter)
+ return { };
+ }
// FIXME: We should see if there is "force ASCII range" mode in ICU;
// until then, we change the backslash into a yen sign.
// Encoding will change the yen sign back into a backslash.
- Vector<UChar> copy;
- const UChar* source = characters;
+ String copy;
if (shouldShowBackslashAsCurrencySymbolIn(m_encodingName)) {
- for (size_t i = 0; i < length; ++i) {
- if (characters[i] == '\\') {
- copy.reserveInitialCapacity(length);
- for (size_t j = 0; j < i; ++j)
- copy.uncheckedAppend(characters[j]);
- for (size_t j = i; j < length; ++j) {
- UChar character = characters[j];
- if (character == '\\')
- character = yenSign;
- copy.uncheckedAppend(character);
- }
- source = copy.data();
- break;
- }
- }
+ copy = string.toStringWithoutCopying();
+ copy.replace('\\', yenSign);
+ string = copy;
}
- const UChar* sourceLimit = source + length;
-
- UErrorCode err = U_ZERO_ERROR;
+ UErrorCode error;
switch (handling) {
- case QuestionMarksForUnencodables:
- ucnv_setSubstChars(m_converterICU, "?", 1, &err);
- ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackSubstitute : UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &err);
- break;
- case EntitiesForUnencodables:
- ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackEscape : UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err);
- break;
- case URLEncodedEntitiesForUnencodables:
- ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkUrlEscapedEntityCallack : urlEscapedEntityCallback, 0, 0, 0, &err);
- break;
+ case UnencodableHandling::QuestionMarks:
+ error = U_ZERO_ERROR;
+ ucnv_setSubstChars(m_converter.get(), "?", 1, &error);
+ if (U_FAILURE(error))
+ return { };
+ error = U_ZERO_ERROR;
+ ucnv_setFromUCallBack(m_converter.get(), m_needsGBKFallbacks ? gbkCallbackSubstitute : UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &error);
+ if (U_FAILURE(error))
+ return { };
+ break;
+ case UnencodableHandling::Entities:
+ error = U_ZERO_ERROR;
+ ucnv_setFromUCallBack(m_converter.get(), m_needsGBKFallbacks ? gbkCallbackEscape : UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &error);
+ if (U_FAILURE(error))
+ return { };
+ break;
+ case UnencodableHandling::URLEncodedEntities:
+ error = U_ZERO_ERROR;
+ ucnv_setFromUCallBack(m_converter.get(), m_needsGBKFallbacks ? gbkUrlEscapedEntityCallack : urlEscapedEntityCallback, 0, 0, 0, &error);
+ if (U_FAILURE(error))
+ return { };
+ break;
}
- ASSERT(U_SUCCESS(err));
- if (U_FAILURE(err))
- return CString();
+ auto upconvertedCharacters = string.upconvertedCharacters();
+ auto* source = upconvertedCharacters.get();
+ auto* sourceLimit = source + string.length();
- Vector<char> result;
- size_t size = 0;
+ Vector<uint8_t> result;
do {
char buffer[ConversionBufferSize];
char* target = buffer;
char* targetLimit = target + ConversionBufferSize;
- err = U_ZERO_ERROR;
- ucnv_fromUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, 0, true, &err);
- size_t count = target - buffer;
- result.grow(size + count);
- memcpy(result.data() + size, buffer, count);
- size += count;
- } while (err == U_BUFFER_OVERFLOW_ERROR);
-
- return CString(result.data(), size);
+ error = U_ZERO_ERROR;
+ ucnv_fromUnicode(m_converter.get(), &target, targetLimit, &source, sourceLimit, 0, true, &error);
+ result.append(reinterpret_cast<uint8_t*>(buffer), target - buffer);
+ } while (error == U_BUFFER_OVERFLOW_ERROR);
+ return result;
}
} // namespace WebCore
/*
- * Copyright (C) 2004, 2006, 2007, 2011 Apple Inc. All rights reserved.
+ * Copyright (C) 2004-2017 Apple Inc. All rights reserved.
* Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
*
* Redistribution and use in source and binary forms, with or without
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef TextCodecICU_h
-#define TextCodecICU_h
+#pragma once
#include "TextCodec.h"
-#include <unicode/utypes.h>
-
-typedef struct UConverter UConverter;
+#include <unicode/ucnv.h>
namespace WebCore {
- class TextCodecICU : public TextCodec {
- public:
- static std::unique_ptr<TextCodec> create(const TextEncoding&, const void* additionalData);
+using ICUConverterPtr = std::unique_ptr<UConverter, void (*)(UConverter*)>;
- TextCodecICU(const char* encoding, const char* canonicalConverterName);
+class TextCodecICU : public TextCodec {
+public:
+ TextCodecICU(const char* encoding, const char* canonicalConverterName);
+ virtual ~TextCodecICU();
- static void registerEncodingNames(EncodingNameRegistrar);
- static void registerCodecs(TextCodecRegistrar);
+ static void registerEncodingNames(EncodingNameRegistrar);
+ static void registerCodecs(TextCodecRegistrar);
- virtual ~TextCodecICU();
+private:
+ String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError) final;
+ Vector<uint8_t> encode(StringView, UnencodableHandling) final;
- private:
- String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError) override;
- CString encode(const UChar*, size_t length, UnencodableHandling) override;
+ void createICUConverter() const;
+ void releaseICUConverter() const;
+ bool needsGBKFallbacks() const { return m_needsGBKFallbacks; }
+ void setNeedsGBKFallbacks(bool needsFallbacks) { m_needsGBKFallbacks = needsFallbacks; }
- void createICUConverter() const;
- void releaseICUConverter() const;
- bool needsGBKFallbacks() const { return m_needsGBKFallbacks; }
- void setNeedsGBKFallbacks(bool needsFallbacks) { m_needsGBKFallbacks = needsFallbacks; }
-
- int decodeToBuffer(UChar* buffer, UChar* bufferLimit, const char*& source,
- const char* sourceLimit, int32_t* offsets, bool flush, UErrorCode& err);
+ int decodeToBuffer(UChar* buffer, UChar* bufferLimit, const char*& source, const char* sourceLimit, int32_t* offsets, bool flush, UErrorCode&);
- const char* const m_encodingName;
- const char* const m_canonicalConverterName;
- mutable UConverter* m_converterICU;
- mutable bool m_needsGBKFallbacks;
- };
+ const char* const m_encodingName;
+ const char* const m_canonicalConverterName;
+ mutable ICUConverterPtr m_converter { nullptr, ucnv_close };
+ mutable bool m_needsGBKFallbacks { false };
+};
- struct ICUConverterWrapper {
- WTF_MAKE_NONCOPYABLE(ICUConverterWrapper); WTF_MAKE_FAST_ALLOCATED;
- public:
- ICUConverterWrapper() : converter(0) { }
- ~ICUConverterWrapper();
+struct ICUConverterWrapper {
+ ICUConverterPtr converter { nullptr, ucnv_close };
- UConverter* converter;
- };
+ WTF_MAKE_FAST_ALLOCATED;
+};
} // namespace WebCore
-#endif // TextCodecICU_h
/*
- * Copyright (C) 2004, 2006, 2008 Apple Inc. All rights reserved.
+ * Copyright (C) 2004-2017 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
#include "TextCodecLatin1.h"
#include "TextCodecASCIIFastPath.h"
+#include <array>
#include <wtf/text/CString.h>
#include <wtf/text/StringBuffer.h>
#include <wtf/text/WTFString.h>
namespace WebCore {
using namespace WTF;
-static const UChar table[256] = {
+static const UChar latin1ConversionTable[256] = {
0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, // 00-07
0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, // 08-0F
0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, // 10-17
registrar("x-cp1252", "windows-1252");
}
-static std::unique_ptr<TextCodec> newStreamingTextDecoderWindowsLatin1(const TextEncoding&, const void*)
-{
- return std::make_unique<TextCodecLatin1>();
-}
-
void TextCodecLatin1::registerCodecs(TextCodecRegistrar registrar)
{
- registrar("windows-1252", newStreamingTextDecoderWindowsLatin1, 0);
+ registrar("windows-1252", [] {
+ return std::make_unique<TextCodecLatin1>();
+ });
}
String TextCodecLatin1::decode(const char* bytes, size_t length, bool, bool, bool&)
*destination = *source;
} else {
useLookupTable:
- if (table[*source] > 0xff)
+ if (latin1ConversionTable[*source] > 0xff)
goto upConvertTo16Bit;
- *destination = table[*source];
+ *destination = latin1ConversionTable[*source];
}
++source;
*destination16++ = *ptr8++;
// Handle the character that triggered the 16 bit path
- *destination16 = table[*source];
+ *destination16 = latin1ConversionTable[*source];
++source;
++destination16;
*destination16 = *source;
} else {
useLookupTable16:
- *destination16 = table[*source];
+ *destination16 = latin1ConversionTable[*source];
}
++source;
return result16;
}
-static CString encodeComplexWindowsLatin1(const UChar* characters, size_t length, UnencodableHandling handling)
+static Vector<uint8_t> encodeComplexWindowsLatin1(StringView string, UnencodableHandling handling)
{
- Vector<char> result(length);
- char* bytes = result.data();
+ Vector<uint8_t> result;
- size_t resultLength = 0;
- for (size_t i = 0; i < length; ) {
- UChar32 c;
- U16_NEXT(characters, i, length, c);
- unsigned char b = c;
+ for (auto character : string.codePoints()) {
+ uint8_t b = character;
// Do an efficient check to detect characters other than 00-7F and A0-FF.
- if (b != c || (c & 0xE0) == 0x80) {
+ if (b != character || (character & 0xE0) == 0x80) {
// Look for a way to encode this with Windows Latin-1.
- for (b = 0x80; b < 0xA0; ++b)
- if (table[b] == c)
+ for (b = 0x80; b < 0xA0; ++b) {
+ if (latin1ConversionTable[b] == character)
goto gotByte;
+ }
// No way to encode this character with Windows Latin-1.
UnencodableReplacementArray replacement;
- int replacementLength = TextCodec::getUnencodableReplacement(c, handling, replacement);
- result.grow(resultLength + replacementLength + length - i);
- bytes = result.data();
- memcpy(bytes + resultLength, replacement, replacementLength);
- resultLength += replacementLength;
+ int replacementLength = TextCodec::getUnencodableReplacement(character, handling, replacement);
+ result.append(replacement.data(), replacementLength);
continue;
}
gotByte:
- bytes[resultLength++] = b;
+ result.append(b);
}
- return CString(bytes, resultLength);
+ return result;
}
-CString TextCodecLatin1::encode(const UChar* characters, size_t length, UnencodableHandling handling)
+Vector<uint8_t> TextCodecLatin1::encode(StringView string, UnencodableHandling handling)
{
{
- char* bytes;
- CString string = CString::newUninitialized(length, bytes);
+ Vector<uint8_t> result(string.length());
+ auto* bytes = result.data();
- // Convert the string a fast way and simultaneously do an efficient check to see if it's all ASCII.
+ // Convert and simultaneously do a check to see if it's all ASCII.
UChar ored = 0;
- for (size_t i = 0; i < length; ++i) {
- UChar c = characters[i];
- bytes[i] = c;
- ored |= c;
+ for (auto character : string.codeUnits()) {
+ *bytes++ = character;
+ ored |= character;
}
if (!(ored & 0xFF80))
- return string;
+ return result;
}
// If it wasn't all ASCII, call the function that handles more-complex cases.
- return encodeComplexWindowsLatin1(characters, length, handling);
+ return encodeComplexWindowsLatin1(string, handling);
}
} // namespace WebCore
/*
- * Copyright (C) 2004, 2006 Apple Inc. All rights reserved.
+ * Copyright (C) 2004-2017 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef TextCodecLatin1_h
-#define TextCodecLatin1_h
+#pragma once
#include "TextCodec.h"
namespace WebCore {
- class TextCodecLatin1 : public TextCodec {
- public:
- static void registerEncodingNames(EncodingNameRegistrar);
- static void registerCodecs(TextCodecRegistrar);
+class TextCodecLatin1 final : public TextCodec {
+public:
+ static void registerEncodingNames(EncodingNameRegistrar);
+ static void registerCodecs(TextCodecRegistrar);
- String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError) override;
- CString encode(const UChar*, size_t length, UnencodableHandling) override;
- };
+private:
+ String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError) final;
+ Vector<uint8_t> encode(StringView, UnencodableHandling) final;
+};
} // namespace WebCore
-#endif // TextCodecLatin1_h
/*
- * Copyright (C) 2016 Apple Inc. All rights reserved.
+ * Copyright (C) 2016-2017 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
namespace WebCore {
-std::unique_ptr<TextCodec> TextCodecReplacement::create(const TextEncoding&, const void*)
-{
- return std::make_unique<TextCodecReplacement>();
-}
-
-TextCodecReplacement::TextCodecReplacement()
-{
-}
-
void TextCodecReplacement::registerEncodingNames(EncodingNameRegistrar registrar)
{
- // The 'replacement' itself is not a valid label. It is the name of
+ // The string 'replacement' itself is not a valid label. It is the name of
// a group of legacy encoding labels. Hence, it cannot be used directly.
+ // The TextEncoding class implements the above rule; here we register it normally.
registrar("replacement", "replacement");
- // The labels
registrar("csiso2022kr", "replacement");
registrar("hz-gb-2312", "replacement");
registrar("iso-2022-cn", "replacement");
void TextCodecReplacement::registerCodecs(TextCodecRegistrar registrar)
{
- registrar("replacement", create, 0);
+ registrar("replacement", [] {
+ return std::make_unique<TextCodecReplacement>();
+ });
}
String TextCodecReplacement::decode(const char*, size_t, bool, bool, bool& sawError)
sawError = true;
if (m_sentEOF)
return emptyString();
-
m_sentEOF = true;
- return String(&replacementCharacter, 1);
+ return String { &replacementCharacter, 1 };
}
} // namespace WebCore
/*
- * Copyright (C) 2016 Apple Inc. All rights reserved.
+ * Copyright (C) 2016-2017 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* THE POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef TextCodecReplacement_h
-#define TextCodecReplacement_h
+#pragma once
#include "TextCodecUTF8.h"
class TextCodecReplacement : public TextCodecUTF8 {
public:
- static std::unique_ptr<TextCodec> create(const TextEncoding&, const void*);
-
- TextCodecReplacement();
-
static void registerEncodingNames(EncodingNameRegistrar);
static void registerCodecs(TextCodecRegistrar);
private:
- String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError) override;
+ String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError) final;
bool m_sentEOF { false };
};
} // namespace WebCore
-
-#endif /* TextCodecReplacement_h */
/*
- * Copyright (C) 2004, 2006, 2008, 2010 Apple Inc. All rights reserved.
+ * Copyright (C) 2004-2017 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
namespace WebCore {
+inline TextCodecUTF16::TextCodecUTF16(bool littleEndian)
+ : m_littleEndian(littleEndian)
+{
+}
+
void TextCodecUTF16::registerEncodingNames(EncodingNameRegistrar registrar)
{
registrar("UTF-16LE", "UTF-16LE");
registrar("unicodeFFFE", "UTF-16BE");
}
-static std::unique_ptr<TextCodec> newStreamingTextDecoderUTF16LE(const TextEncoding&, const void*)
-{
- return std::make_unique<TextCodecUTF16>(true);
-}
-
-static std::unique_ptr<TextCodec> newStreamingTextDecoderUTF16BE(const TextEncoding&, const void*)
-{
- return std::make_unique<TextCodecUTF16>(false);
-}
-
void TextCodecUTF16::registerCodecs(TextCodecRegistrar registrar)
{
- registrar("UTF-16LE", newStreamingTextDecoderUTF16LE, 0);
- registrar("UTF-16BE", newStreamingTextDecoderUTF16BE, 0);
+ registrar("UTF-16LE", [] {
+ return std::make_unique<TextCodecUTF16>(true);
+ });
+ registrar("UTF-16BE", [] {
+ return std::make_unique<TextCodecUTF16>(false);
+ });
}
String TextCodecUTF16::decode(const char* bytes, size_t length, bool, bool, bool&)
return String::adopt(WTFMove(buffer));
}
-CString TextCodecUTF16::encode(const UChar* characters, size_t length, UnencodableHandling)
+Vector<uint8_t> TextCodecUTF16::encode(StringView string, UnencodableHandling)
{
- // We need to be sure we can double the length without overflowing.
- // Since the passed-in length is the length of an actual existing
- // character buffer, each character is two bytes, and we know
- // the buffer doesn't occupy the entire address space, we can
- // assert here that doubling the length does not overflow size_t
- // and there's no need for a runtime check.
- ASSERT(length <= std::numeric_limits<size_t>::max() / 2);
-
- char* bytes;
- CString string = CString::newUninitialized(length * 2, bytes);
-
- // FIXME: CString is not a reasonable data structure for encoded UTF-16, which will have
- // null characters inside it. Perhaps the result of encode should not be a CString.
+ Vector<uint8_t> result(WTF::checkedProduct<size_t>(string.length(), 2).unsafeGet());
+ auto* bytes = result.data();
+
if (m_littleEndian) {
- for (size_t i = 0; i < length; ++i) {
- UChar c = characters[i];
- bytes[i * 2] = c;
- bytes[i * 2 + 1] = c >> 8;
+ for (auto character : string.codeUnits()) {
+ *bytes++ = character;
+ *bytes++ = character >> 8;
}
} else {
- for (size_t i = 0; i < length; ++i) {
- UChar c = characters[i];
- bytes[i * 2] = c >> 8;
- bytes[i * 2 + 1] = c;
+ for (auto character : string.codeUnits()) {
+ *bytes++ = character >> 8;
+ *bytes++ = character;
}
}
- return string;
+ return result;
}
} // namespace WebCore
/*
- * Copyright (C) 2004, 2006 Apple Inc. All rights reserved.
+ * Copyright (C) 2004-2017 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef TextCodecUTF16_h
-#define TextCodecUTF16_h
+#pragma once
#include "TextCodec.h"
namespace WebCore {
- class TextCodecUTF16 : public TextCodec {
- public:
- static void registerEncodingNames(EncodingNameRegistrar);
- static void registerCodecs(TextCodecRegistrar);
+class TextCodecUTF16 : public TextCodec {
+public:
+ static void registerEncodingNames(EncodingNameRegistrar);
+ static void registerCodecs(TextCodecRegistrar);
- TextCodecUTF16(bool littleEndian) : m_littleEndian(littleEndian), m_haveBufferedByte(false) { }
+ explicit TextCodecUTF16(bool littleEndian);
- String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError) override;
- CString encode(const UChar*, size_t length, UnencodableHandling) override;
+private:
+ String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError) final;
+ Vector<uint8_t> encode(StringView, UnencodableHandling) final;
- private:
- bool m_littleEndian;
- bool m_haveBufferedByte;
- unsigned char m_bufferedByte;
- };
+ bool m_littleEndian;
+ bool m_haveBufferedByte { false };
+ unsigned char m_bufferedByte;
+};
} // namespace WebCore
-
-#endif // TextCodecUTF16_h
void TextCodecUTF8::registerCodecs(TextCodecRegistrar registrar)
{
- registrar("UTF-8", [] (const TextEncoding&, const void*) -> std::unique_ptr<TextCodec> {
+ registrar("UTF-8", [] {
return std::make_unique<TextCodecUTF8>();
- }, nullptr);
+ });
}
static inline int nonASCIISequenceLength(uint8_t firstByte)
return String::adopt(WTFMove(buffer16));
}
-CString TextCodecUTF8::encode(const UChar* characters, size_t length, UnencodableHandling)
+Vector<uint8_t> TextCodecUTF8::encode(StringView string, UnencodableHandling)
{
// The maximum number of UTF-8 bytes needed per UTF-16 code unit is 3.
// BMP characters take only one UTF-16 code unit and can take up to 3 bytes (3x).
// Non-BMP characters take two UTF-16 code units and can take up to 4 bytes (2x).
- if (length > std::numeric_limits<size_t>::max() / 3)
- CRASH();
-
- Vector<char, 3000> bytes(length * 3);
+ Vector<uint8_t> bytes(WTF::checkedProduct<size_t>(string.length(), 3).unsafeGet());
size_t bytesWritten = 0;
- for (size_t i = 0; i < length; ) {
- UChar32 character;
- U16_NEXT(characters, i, length, character);
+ for (auto character : string.codePoints())
U8_APPEND_UNSAFE(bytes.data(), bytesWritten, character);
- }
- return CString { bytes.data(), bytesWritten };
+ bytes.shrink(bytesWritten);
+ return bytes;
}
} // namespace WebCore
#pragma once
#include "TextCodec.h"
+#include <wtf/text/LChar.h>
namespace WebCore {
private:
String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError) override;
- CString encode(const UChar*, size_t length, UnencodableHandling) override;
+ Vector<uint8_t> encode(StringView, UnencodableHandling) final;
bool handlePartialSequence(LChar*& destination, const uint8_t*& source, const uint8_t* end, bool flush);
void handlePartialSequence(UChar*& destination, const uint8_t*& source, const uint8_t* end, bool flush, bool stopOnError, bool& sawError);
/*
- * Copyright (C) 2007, 2008 Apple, Inc. All rights reserved.
+ * Copyright (C) 2007-2017 Apple, Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
#include "config.h"
#include "TextCodecUserDefined.h"
-#include <stdio.h>
+#include <array>
#include <wtf/text/CString.h>
-#include <wtf/text/StringBuffer.h>
#include <wtf/text/StringBuilder.h>
#include <wtf/text/WTFString.h>
registrar("x-user-defined", "x-user-defined");
}
-static std::unique_ptr<TextCodec> newStreamingTextDecoderUserDefined(const TextEncoding&, const void*)
-{
- return std::make_unique<TextCodecUserDefined>();
-}
-
void TextCodecUserDefined::registerCodecs(TextCodecRegistrar registrar)
{
- registrar("x-user-defined", newStreamingTextDecoderUserDefined, 0);
+ registrar("x-user-defined", [] {
+ return std::make_unique<TextCodecUserDefined>();
+ });
}
String TextCodecUserDefined::decode(const char* bytes, size_t length, bool, bool, bool&)
{
StringBuilder result;
result.reserveCapacity(length);
-
for (size_t i = 0; i < length; ++i) {
signed char c = bytes[i];
result.append(static_cast<UChar>(c & 0xF7FF));
}
-
return result.toString();
}
-static CString encodeComplexUserDefined(const UChar* characters, size_t length, UnencodableHandling handling)
+static Vector<uint8_t> encodeComplexUserDefined(StringView string, UnencodableHandling handling)
{
- Vector<char> result(length);
- char* bytes = result.data();
+ Vector<uint8_t> result;
- size_t resultLength = 0;
- for (size_t i = 0; i < length; ) {
- UChar32 c;
- U16_NEXT(characters, i, length, c);
- signed char signedByte = c;
- if ((signedByte & 0xF7FF) == c)
- bytes[resultLength++] = signedByte;
+ for (auto character : string.codePoints()) {
+ int8_t signedByte = character;
+ if ((signedByte & 0xF7FF) == character)
+ result.append(signedByte);
else {
// No way to encode this character with x-user-defined.
UnencodableReplacementArray replacement;
- int replacementLength = TextCodec::getUnencodableReplacement(c, handling, replacement);
- result.grow(resultLength + replacementLength + length - i);
- bytes = result.data();
- memcpy(bytes + resultLength, replacement, replacementLength);
- resultLength += replacementLength;
+ int replacementLength = TextCodec::getUnencodableReplacement(character, handling, replacement);
+ result.append(replacement.data(), replacementLength);
}
}
- return CString(bytes, resultLength);
+ return result;
}
-CString TextCodecUserDefined::encode(const UChar* characters, size_t length, UnencodableHandling handling)
+Vector<uint8_t> TextCodecUserDefined::encode(StringView string, UnencodableHandling handling)
{
- char* bytes;
- CString string = CString::newUninitialized(length, bytes);
+ {
+ Vector<uint8_t> result(string.length());
+ auto* bytes = result.data();
+
+ // Convert and simultaneously do a check to see if it's all ASCII.
+ UChar ored = 0;
+ for (auto character : string.codeUnits()) {
+ *bytes++ = character;
+ ored |= character;
+ }
- // Convert the string a fast way and simultaneously do an efficient check to see if it's all ASCII.
- UChar ored = 0;
- for (size_t i = 0; i < length; ++i) {
- UChar c = characters[i];
- bytes[i] = c;
- ored |= c;
+ if (!(ored & 0xFF80))
+ return result;
}
- if (!(ored & 0xFF80))
- return string;
-
// If it wasn't all ASCII, call the function that handles more-complex cases.
- return encodeComplexUserDefined(characters, length, handling);
+ return encodeComplexUserDefined(string, handling);
}
} // namespace WebCore
/*
- * Copyright (C) 2007 Apple, Inc. All rights reserved.
+ * Copyright (C) 2007-2017 Apple, Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef TextCodecUserDefined_h
-#define TextCodecUserDefined_h
+#pragma once
#include "TextCodec.h"
namespace WebCore {
- class TextCodecUserDefined : public TextCodec {
- public:
- static void registerEncodingNames(EncodingNameRegistrar);
- static void registerCodecs(TextCodecRegistrar);
+class TextCodecUserDefined : public TextCodec {
+public:
+ static void registerEncodingNames(EncodingNameRegistrar);
+ static void registerCodecs(TextCodecRegistrar);
- String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError) override;
- CString encode(const UChar*, size_t length, UnencodableHandling) override;
- };
+private:
+ String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError) final;
+ Vector<uint8_t> encode(StringView, UnencodableHandling) final;
+};
} // namespace WebCore
-
-#endif // TextCodecUserDefined_h
/*
- * Copyright (C) 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
+ * Copyright (C) 2004-2017 Apple Inc. All rights reserved.
* Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
* Copyright (C) 2007-2009 Torch Mobile, Inc.
*
, m_backslashAsCurrencySymbol(backslashAsCurrencySymbol())
{
// Aliases are valid, but not "replacement" itself.
- if (m_name && isReplacementEncoding(name))
+ if (equalLettersIgnoringASCIICase(name, "replacement"))
m_name = nullptr;
}
, m_backslashAsCurrencySymbol(backslashAsCurrencySymbol())
{
// Aliases are valid, but not "replacement" itself.
- if (m_name && isReplacementEncoding(name))
+ if (equalLettersIgnoringASCIICase(name, "replacement"))
m_name = nullptr;
}
return newTextCodec(*this)->decode(data, length, true, stopOnError, sawError);
}
-CString TextEncoding::encode(StringView text, UnencodableHandling handling) const
+Vector<uint8_t> TextEncoding::encode(StringView text, UnencodableHandling handling) const
{
- if (!m_name)
- return CString();
+ if (!m_name || text.isEmpty())
+ return { };
- if (text.isEmpty())
- return "";
+ // FIXME: Consider adding a fast case for ASCII.
// FIXME: What's the right place to do normalization?
// It's a little strange to do it inside the encode function.
auto upconvertedCharacters = text.upconvertedCharacters();
const UChar* source = upconvertedCharacters;
- size_t sourceLength = text.length();
+ unsigned sourceLength = text.length();
Vector<UChar> normalizedCharacters;
sourceLength = normalizedLength;
}
- return newTextCodec(*this)->encode(source, sourceLength, handling);
+ return newTextCodec(*this)->encode(StringView { source, sourceLength }, handling);
}
const char* TextEncoding::domName() const
/*
- * Copyright (C) 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
+ * Copyright (C) 2004-2017 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
#pragma once
-#include "TextCodec.h"
-#include <wtf/Forward.h>
+#include <pal/text/UnencodableHandling.h>
+#include <wtf/text/WTFString.h>
namespace WebCore {
WEBCORE_EXPORT String decode(const char*, size_t length, bool stopOnError, bool& sawError) const;
String decode(const char*, size_t length) const;
- CString encode(StringView, UnencodableHandling) const;
+ Vector<uint8_t> encode(StringView, UnencodableHandling) const;
UChar backslashAsCurrencySymbol() const;
bool isByteBasedEncoding() const { return !isNonByteBasedEncoding(); }
/*
- * Copyright (C) 2006, 2007, 2011 Apple Inc. All rights reserved.
+ * Copyright (C) 2006-2017 Apple Inc. All rights reserved.
* Copyright (C) 2007-2009 Torch Mobile, Inc.
*
* Redistribution and use in source and binary forms, with or without
#include "TextCodecICU.h"
#include "TextCodecLatin1.h"
#include "TextCodecReplacement.h"
-#include "TextCodecUserDefined.h"
#include "TextCodecUTF16.h"
#include "TextCodecUTF8.h"
+#include "TextCodecUserDefined.h"
#include "TextEncoding.h"
#include <mutex>
#include <wtf/ASCIICType.h>
+#include <wtf/CheckedArithmetic.h>
+#include <wtf/CurrentTime.h>
#include <wtf/HashMap.h>
#include <wtf/HashSet.h>
#include <wtf/Lock.h>
#include <wtf/MainThread.h>
#include <wtf/StdLibExtras.h>
-#include <wtf/StringExtras.h>
-
-#include <wtf/CurrentTime.h>
#include <wtf/text/CString.h>
-
namespace WebCore {
using namespace WTF;
static const bool safeToCompareToEmptyOrDeleted = false;
};
-struct TextCodecFactory {
- NewTextCodecFunction function;
- const void* additionalData;
- TextCodecFactory(NewTextCodecFunction f = 0, const void* d = 0) : function(f), additionalData(d) { }
-};
-
-typedef HashMap<const char*, const char*, TextEncodingNameHash> TextEncodingNameMap;
-typedef HashMap<const char*, TextCodecFactory> TextCodecMap;
+using TextEncodingNameMap = HashMap<const char*, const char*, TextEncodingNameHash>;
+using TextCodecMap = HashMap<const char*, NewTextCodecFunction>;
static StaticLock encodingRegistryMutex;
textEncodingNameMap->add(alias, atomicName);
}
-static void addToTextCodecMap(const char* name, NewTextCodecFunction function, const void* additionalData)
+static void addToTextCodecMap(const char* name, NewTextCodecFunction&& function)
{
const char* atomicName = textEncodingNameMap->get(name);
ASSERT(atomicName);
- textCodecMap->add(atomicName, TextCodecFactory(function, additionalData));
+ textCodecMap->add(atomicName, WTFMove(function));
}
static void pruneBlacklistedCodecs()
return canonicalEncodingName && japaneseEncodings && japaneseEncodings->contains(canonicalEncodingName);
}
-bool isReplacementEncoding(const char* alias)
-{
- if (!alias)
- return false;
-
- if (strlen(alias) != 11)
- return false;
-
- return !strcasecmp(alias, "replacement");
-}
-
-bool isReplacementEncoding(const String& alias)
-{
- return equalLettersIgnoringASCIICase(alias, "replacement");
-}
-
bool shouldShowBackslashAsCurrencySymbolIn(const char* canonicalEncodingName)
{
return canonicalEncodingName && nonBackslashEncodings && nonBackslashEncodings->contains(canonicalEncodingName);
std::lock_guard<StaticLock> lock(encodingRegistryMutex);
ASSERT(textCodecMap);
- TextCodecFactory factory = textCodecMap->get(encoding.name());
- ASSERT(factory.function);
- return factory.function(encoding, factory.additionalData);
+ auto result = textCodecMap->find(encoding.name());
+ ASSERT(result != textCodecMap->end());
+ return result->value();
}
const char* atomicCanonicalTextEncodingName(const char* name)
return textEncodingNameMap->get(name);
}
-template <typename CharacterType>
-const char* atomicCanonicalTextEncodingName(const CharacterType* characters, size_t length)
+template<typename CharacterType> static const char* atomicCanonicalTextEncodingName(const CharacterType* characters, size_t length)
{
char buffer[maxEncodingNameLength + 1];
size_t j = 0;
for (size_t i = 0; i < length; ++i) {
- CharacterType c = characters[i];
if (j == maxEncodingNameLength)
- return 0;
- buffer[j++] = c;
+ return nullptr;
+ buffer[j++] = characters[i];
}
buffer[j] = 0;
return atomicCanonicalTextEncodingName(buffer);
const char* atomicCanonicalTextEncodingName(const String& alias)
{
- if (!alias.length())
+ if (alias.isEmpty() || !alias.isAllASCII())
return nullptr;
if (alias.is8Bit())
#endif
}
-#ifndef NDEBUG
-void dumpTextEncodingNameMap()
-{
- unsigned size = textEncodingNameMap->size();
- fprintf(stderr, "Dumping %u entries in WebCore::textEncodingNameMap...\n", size);
-
- std::lock_guard<StaticLock> lock(encodingRegistryMutex);
-
- TextEncodingNameMap::const_iterator it = textEncodingNameMap->begin();
- TextEncodingNameMap::const_iterator end = textEncodingNameMap->end();
- for (; it != end; ++it)
- fprintf(stderr, "'%s' => '%s'\n", it->key, it->value);
-}
-#endif
-
} // namespace WebCore
#pragma once
#include <memory>
-#include <wtf/text/WTFString.h>
+#include <wtf/Forward.h>
namespace WebCore {
// Only TextEncoding should use the following functions directly.
const char* atomicCanonicalTextEncodingName(const char* alias);
-template <typename CharacterType>
-const char* atomicCanonicalTextEncodingName(const CharacterType*, size_t);
const char* atomicCanonicalTextEncodingName(const String&);
bool noExtendedTextEncodingNameUsed();
bool isJapaneseEncoding(const char* canonicalEncodingName);
bool shouldShowBackslashAsCurrencySymbolIn(const char* canonicalEncodingName);
-bool isReplacementEncoding(const char* alias);
-bool isReplacementEncoding(const String& alias);
WEBCORE_EXPORT String defaultTextEncodingNameForSystemLanguage();
+
#if PLATFORM(COCOA)
WEBCORE_EXPORT CFStringEncoding webDefaultCFStringEncoding();
#endif
-#ifndef NDEBUG
-void dumpTextEncodingNameMap();
-#endif
-
} // namespace WebCore
int resultLength = WideCharToMultiByte(m_codePage, WC_COMPOSITECHECK, characters, length, 0, 0, 0, 0);
- // FIXME: We need to implement UnencodableHandling: QuestionMarksForUnencodables, EntitiesForUnencodables, and URLEncodedEntitiesForUnencodables.
+ // FIXME: We need to implement UnencodableHandling.
if (resultLength <= 0)
return "?";
// FIXME: According to XMLHttpRequest Level 2, this should use the Document.innerHTML algorithm
// from the HTML5 specification to serialize the document.
- m_requestEntityBody = FormData::create(UTF8Encoding().encode(createMarkup(document), EntitiesForUnencodables));
+ m_requestEntityBody = FormData::create(UTF8Encoding().encode(createMarkup(document), UnencodableHandling::Entities));
if (m_upload)
m_requestEntityBody->setAlwaysStream(true);
}
m_requestHeaders.set(HTTPHeaderName::ContentType, contentType);
}
- m_requestEntityBody = FormData::create(UTF8Encoding().encode(body, EntitiesForUnencodables));
+ m_requestEntityBody = FormData::create(UTF8Encoding().encode(body, UnencodableHandling::Entities));
if (m_upload)
m_requestEntityBody->setAlwaysStream(true);
}
#include <libxslt/templates.h>
#include <libxslt/xsltutils.h>
-#include <wtf/StringExtras.h>
#include <wtf/Vector.h>
#include <wtf/unicode/Collator.h>
#include "TextResourceDecoder.h"
#include "TreeDepthLimit.h"
#include <wtf/Ref.h>
-#include <wtf/StringExtras.h>
#include <wtf/Threading.h>
#include <wtf/Vector.h>
#include "XMLNSNames.h"
#include "XMLDocumentParserScope.h"
#include <libxml/parserInternals.h>
-#include <wtf/StringExtras.h>
#include <wtf/unicode/UTF8.h>
#if ENABLE(XSLT)
+2017-12-02 Darin Adler <darin@apple.com>
+
+ Modernize some aspects of text codecs, eliminate WebKit use of strcasecmp
+ https://bugs.webkit.org/show_bug.cgi?id=180009
+
+ Reviewed by Alex Christensen.
+
+ * WebProcess/Plugins/Netscape/NetscapeBrowserFuncs.cpp:
+ (WebKit::initializeProtectionSpace): Use equalLettersIgnoringASCIICase instead
+ of strcasecmp.
+
2017-12-06 David Quesada <david_quesada@apple.com>
[Web App Manifest] Add SPI for applying a manifest to a top-level browsing context
static bool initializeProtectionSpace(const char* protocol, const char* host, int port, const char* scheme, const char* realm, ProtectionSpace& protectionSpace)
{
ProtectionSpaceServerType serverType;
- if (!strcasecmp(protocol, "http"))
+ if (equalLettersIgnoringASCIICase(protocol, "http"))
serverType = ProtectionSpaceServerHTTP;
- else if (!strcasecmp(protocol, "https"))
+ else if (equalLettersIgnoringASCIICase(protocol, "https"))
serverType = ProtectionSpaceServerHTTPS;
else {
// We only care about http and https.
ProtectionSpaceAuthenticationScheme authenticationScheme = ProtectionSpaceAuthenticationSchemeDefault;
if (serverType == ProtectionSpaceServerHTTP) {
- if (!strcasecmp(scheme, "basic"))
+ if (equalLettersIgnoringASCIICase(scheme, "basic"))
authenticationScheme = ProtectionSpaceAuthenticationSchemeHTTPBasic;
- else if (!strcmp(scheme, "digest"))
+ else if (equalLettersIgnoringASCIICase(scheme, "digest"))
authenticationScheme = ProtectionSpaceAuthenticationSchemeHTTPDigest;
}
+2017-12-02 Darin Adler <darin@apple.com>
+
+ Modernize some aspects of text codecs, eliminate WebKit use of strcasecmp
+ https://bugs.webkit.org/show_bug.cgi?id=180009
+
+ Reviewed by Alex Christensen.
+
+ * History/HistoryPropertyList.mm: Removed include of StringExtras.h.
+
+ * Plugins/WebBaseNetscapePluginView.mm:
+ (WebKit::getAuthenticationInfo): Use equalLettersIgnoringASCIICase instead
+ of strcasecmp.
+
+ * WebView/WebPreferences.mm:
+ (contains): Ditto. Also made this a template so it's easier to call on an array.
+ (cacheModelForMainBundle): Take advantage of the above to make it cleaner.
+
2017-12-05 Brent Fulgham <bfulgham@apple.com>
Limit user agent versioning to an upper bound
#import "WebHistoryItemInternal.h"
#import <WebCore/HistoryItem.h>
-#import <wtf/StringExtras.h>
using namespace WebCore;
namespace WebKit {
-bool getAuthenticationInfo(const char* protocolStr, const char* hostStr, int32_t port, const char* schemeStr, const char* realmStr,
- CString& username, CString& password)
+bool getAuthenticationInfo(const char* protocolStr, const char* hostStr, int32_t port, const char* schemeStr, const char* realmStr, CString& username, CString& password)
{
- if (strcasecmp(protocolStr, "http") != 0 &&
- strcasecmp(protocolStr, "https") != 0)
+ if (!equalLettersIgnoringASCIICase(protocolStr, "http") && !equalLettersIgnoringASCIICase(protocolStr, "https"))
return false;
NSString *host = [NSString stringWithUTF8String:hostStr];
return NPERR_GENERIC_ERROR;
NSString *authenticationMethod = NSURLAuthenticationMethodDefault;
- if (!strcasecmp(protocolStr, "http")) {
- if (!strcasecmp(schemeStr, "basic"))
+ if (equalLettersIgnoringASCIICase(protocolStr, "http")) {
+ if (equalLettersIgnoringASCIICase(schemeStr, "basic"))
authenticationMethod = NSURLAuthenticationMethodHTTPBasic;
- else if (!strcasecmp(schemeStr, "digest"))
+ else if (equalLettersIgnoringASCIICase(schemeStr, "digest"))
authenticationMethod = NSURLAuthenticationMethodHTTPDigest;
}
static unsigned webPreferencesInstanceCountWithPrivateBrowsingEnabled;
-static bool contains(const char* const array[], int count, const char* item)
+template<unsigned size> static bool contains(const char* const (&array)[size], const char* item)
{
if (!item)
return false;
-
- for (int i = 0; i < count; i++)
- if (!strcasecmp(array[i], item))
+ for (auto* string : array) {
+ if (equalIgnoringASCIICase(string, item))
return true;
+ }
return false;
}
};
const char* bundleID = [[[NSBundle mainBundle] bundleIdentifier] UTF8String];
- if (contains(documentViewerIDs, sizeof(documentViewerIDs) / sizeof(documentViewerIDs[0]), bundleID))
+ if (contains(documentViewerIDs, bundleID))
return WebCacheModelDocumentViewer;
- if (contains(documentBrowserIDs, sizeof(documentBrowserIDs) / sizeof(documentBrowserIDs[0]), bundleID))
+ if (contains(documentBrowserIDs, bundleID))
return WebCacheModelDocumentBrowser;
- if (contains(primaryWebBrowserIDs, sizeof(primaryWebBrowserIDs) / sizeof(primaryWebBrowserIDs[0]), bundleID))
+ if (contains(primaryWebBrowserIDs, bundleID))
return WebCacheModelPrimaryWebBrowser;
bool isLinkedAgainstWebKit = WebKitLinkedOnOrAfter(0);
+2017-12-02 Darin Adler <darin@apple.com>
+
+ Modernize some aspects of text codecs, eliminate WebKit use of strcasecmp
+ https://bugs.webkit.org/show_bug.cgi?id=180009
+
+ Reviewed by Alex Christensen.
+
+ * DumpRenderTree/cg/PixelDumpSupportCG.cpp: Removed include of StringExtras.h.
+
+ * TestWebKitAPI/CMakeLists.txt: Added LineEnding.cpp.
+ * TestWebKitAPI/TestWebKitAPI.xcodeproj/project.pbxproj: Ditto.
+ * TestWebKitAPI/Tests/WTF/LineEnding.cpp: Added. Has tests for the LineEnding
+ functions rewritten in this patch.
+
+ * TestWebKitAPI/Tests/WebCore/TextCodec.cpp: Updated test now that the decode
+ function doesn't have defaults for as many arguments any more. Also changed so
+ that the tests log the fact that the decode function returns an error and updated
+ expected results to expect all the errors.
+
+ * WebKitTestRunner/cg/TestInvocationCG.cpp: Ditto.
+
2017-12-06 Alexey Proskuryakov <ap@apple.com>
Exclude another leak callstack.
#include <wtf/Assertions.h>
#include <wtf/RefPtr.h>
#include <wtf/RetainPtr.h>
-#include <wtf/StringExtras.h>
#if PLATFORM(WIN)
#include "MD5.h"
${TESTWEBKITAPI_DIR}/Tests/WTF/JSONValue.cpp
${TESTWEBKITAPI_DIR}/Tests/WTF/LEBDecoder.cpp
${TESTWEBKITAPI_DIR}/Tests/WTF/LifecycleLogger.cpp
+ ${TESTWEBKITAPI_DIR}/Tests/WTF/LineEnding.cpp
${TESTWEBKITAPI_DIR}/Tests/WTF/ListHashSet.cpp
${TESTWEBKITAPI_DIR}/Tests/WTF/Lock.cpp
${TESTWEBKITAPI_DIR}/Tests/WTF/Logger.cpp
93AF4ECE1506F064007FD57E /* NewFirstVisuallyNonEmptyLayoutForImages_Bundle.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 93AF4ECD1506F064007FD57E /* NewFirstVisuallyNonEmptyLayoutForImages_Bundle.cpp */; };
93AF4ED11506F130007FD57E /* lots-of-images.html in Copy Resources */ = {isa = PBXBuildFile; fileRef = 93AF4ECF1506F123007FD57E /* lots-of-images.html */; };
93CFA8671CEB9E38000565A8 /* autofocused-text-input.html in Copy Resources */ = {isa = PBXBuildFile; fileRef = 93CFA8661CEB9DE1000565A8 /* autofocused-text-input.html */; };
+ 93E2C5551FD3204100E1DF6A /* LineEnding.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 93E2C5541FD3204100E1DF6A /* LineEnding.cpp */; };
93E2D2761ED7D53200FA76F6 /* offscreen-iframe-of-media-document.html in Copy Resources */ = {isa = PBXBuildFile; fileRef = 93E2D2751ED7D51700FA76F6 /* offscreen-iframe-of-media-document.html */; };
93E6193B1F931B3A00AF245E /* TextCodec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 93A258981F92FF15003E510C /* TextCodec.cpp */; };
93F1DB3414DA20870024C362 /* NewFirstVisuallyNonEmptyLayout_Bundle.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 93F1DB3314DA20870024C362 /* NewFirstVisuallyNonEmptyLayout_Bundle.cpp */; };
93CFA8681CEBCFED000565A8 /* CandidateTests.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = CandidateTests.mm; sourceTree = "<group>"; };
93D3D19B17B1A7B000C7C415 /* all-content-in-one-iframe.html */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.html; path = "all-content-in-one-iframe.html"; sourceTree = "<group>"; };
93D3D19D17B1A84200C7C415 /* LayoutMilestonesWithAllContentInFrame.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = LayoutMilestonesWithAllContentInFrame.cpp; sourceTree = "<group>"; };
+ 93E2C5541FD3204100E1DF6A /* LineEnding.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = LineEnding.cpp; sourceTree = "<group>"; };
93E2D2751ED7D51700FA76F6 /* offscreen-iframe-of-media-document.html */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.html; path = "offscreen-iframe-of-media-document.html"; sourceTree = "<group>"; };
93E943F11CD3E87E00AC08C2 /* VideoControlsManager.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = VideoControlsManager.mm; sourceTree = "<group>"; };
93F1DB3014DA20760024C362 /* NewFirstVisuallyNonEmptyLayout.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = NewFirstVisuallyNonEmptyLayout.cpp; sourceTree = "<group>"; };
531C1D8D1DF8EF72006E979F /* LEBDecoder.cpp */,
A57D54F71F3397B400A97AA7 /* LifecycleLogger.cpp */,
A57D54F81F3397B400A97AA7 /* LifecycleLogger.h */,
+ 93E2C5541FD3204100E1DF6A /* LineEnding.cpp */,
26300B1716755CD90066886D /* ListHashSet.cpp */,
0FFC45A41B73EBE20085BD62 /* Lock.cpp */,
A57D54F41F3395D000A97AA7 /* Logger.cpp */,
isa = PBXGroup;
children = (
7560917619259C59009EF06E /* ios */,
- FE217ECB1640A54A0052988B /* JavaScriptCore */,
C07E6CAD13FD67650038B22B /* mac */,
C08587F913FEC39B001EF4E5 /* TestWebKitAPI */,
440A1D3614A01000008A66F2 /* WebCore */,
name = Resources;
sourceTree = "<group>";
};
- FE217ECB1640A54A0052988B /* JavaScriptCore */ = {
- isa = PBXGroup;
- children = (
- );
- path = JavaScriptCore;
- sourceTree = "<group>";
- };
/* End PBXGroup section */
/* Begin PBXNativeTarget section */
7A0509411FB9F06400B33FB8 /* JSONValue.cpp in Sources */,
531C1D8E1DF8EF72006E979F /* LEBDecoder.cpp in Sources */,
A57D54F91F3397B400A97AA7 /* LifecycleLogger.cpp in Sources */,
+ 93E2C5551FD3204100E1DF6A /* LineEnding.cpp in Sources */,
7C83DEE81D0A590C00FEBCF3 /* ListHashSet.cpp in Sources */,
7C83DF1D1D0A590C00FEBCF3 /* Lock.cpp in Sources */,
A57D54F61F3395D000A97AA7 /* Logger.cpp in Sources */,
--- /dev/null
+/*
+ * Copyright (C) 2017 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <wtf/Vector.h>
+#include <wtf/text/LineEnding.h>
+
+namespace TestWebKitAPI {
+
+const uint8_t null = 0;
+const uint8_t CR = '\r';
+const uint8_t LF = '\n';
+const uint8_t letterA = 'a';
+const uint8_t letterB = 'b';
+
+static const char* stringify(const Vector<uint8_t>& vector)
+{
+ static char buffer[100];
+ char* out = buffer;
+ for (auto character : vector) {
+ switch (character) {
+ case '\0':
+ *out++ = '<';
+ *out++ = '0';
+ *out++ = '>';
+ break;
+ case '\r':
+ *out++ = '<';
+ *out++ = 'C';
+ *out++ = 'R';
+ *out++ = '>';
+ break;
+ case '\n':
+ *out++ = '<';
+ *out++ = 'L';
+ *out++ = 'F';
+ *out++ = '>';
+ break;
+ default:
+ *out++ = character;
+ }
+ }
+ *out = '\0';
+ return buffer;
+}
+
+TEST(WTF, LineEndingNormalizeToLF)
+{
+ EXPECT_STREQ("", stringify(normalizeLineEndingsToLF({ })));
+
+ EXPECT_STREQ("<0>", stringify(normalizeLineEndingsToLF({ null })));
+ EXPECT_STREQ("a", stringify(normalizeLineEndingsToLF({ letterA })));
+ EXPECT_STREQ("<LF>", stringify(normalizeLineEndingsToLF({ CR })));
+ EXPECT_STREQ("<LF>", stringify(normalizeLineEndingsToLF({ LF })));
+
+ EXPECT_STREQ("<LF>", stringify(normalizeLineEndingsToLF({ CR, LF })));
+ EXPECT_STREQ("<LF><LF>", stringify(normalizeLineEndingsToLF({ LF, LF })));
+ EXPECT_STREQ("<LF><LF>", stringify(normalizeLineEndingsToLF({ CR, CR })));
+ EXPECT_STREQ("<LF><LF>", stringify(normalizeLineEndingsToLF({ LF, CR })));
+
+ EXPECT_STREQ("a<LF>", stringify(normalizeLineEndingsToLF({ letterA, CR })));
+ EXPECT_STREQ("a<LF>", stringify(normalizeLineEndingsToLF({ letterA, LF })));
+ EXPECT_STREQ("a<LF>", stringify(normalizeLineEndingsToLF({ letterA, CR, LF })));
+ EXPECT_STREQ("a<LF><LF>", stringify(normalizeLineEndingsToLF({ letterA, LF, CR })));
+
+ EXPECT_STREQ("a<LF>b", stringify(normalizeLineEndingsToLF({ letterA, CR, letterB })));
+ EXPECT_STREQ("a<LF>b", stringify(normalizeLineEndingsToLF({ letterA, LF, letterB })));
+ EXPECT_STREQ("a<LF>b", stringify(normalizeLineEndingsToLF({ letterA, CR, LF, letterB })));
+ EXPECT_STREQ("a<LF><LF>b", stringify(normalizeLineEndingsToLF({ letterA, LF, CR, letterB })));
+}
+
+TEST(WTF, LineEndingNormalizeToCRLF)
+{
+ EXPECT_STREQ("", stringify(normalizeLineEndingsToCRLF({ })));
+
+ EXPECT_STREQ("<0>", stringify(normalizeLineEndingsToCRLF({ null })));
+ EXPECT_STREQ("a", stringify(normalizeLineEndingsToCRLF({ letterA })));
+ EXPECT_STREQ("<CR><LF>", stringify(normalizeLineEndingsToCRLF({ CR })));
+ EXPECT_STREQ("<CR><LF>", stringify(normalizeLineEndingsToCRLF({ LF })));
+
+ EXPECT_STREQ("<CR><LF>", stringify(normalizeLineEndingsToCRLF({ CR, LF })));
+ EXPECT_STREQ("<CR><LF><CR><LF>", stringify(normalizeLineEndingsToCRLF({ LF, LF })));
+ EXPECT_STREQ("<CR><LF><CR><LF>", stringify(normalizeLineEndingsToCRLF({ CR, CR })));
+ EXPECT_STREQ("<CR><LF><CR><LF>", stringify(normalizeLineEndingsToCRLF({ LF, CR })));
+
+ EXPECT_STREQ("a<CR><LF>", stringify(normalizeLineEndingsToCRLF({ letterA, CR })));
+ EXPECT_STREQ("a<CR><LF>", stringify(normalizeLineEndingsToCRLF({ letterA, LF })));
+ EXPECT_STREQ("a<CR><LF>", stringify(normalizeLineEndingsToCRLF({ letterA, CR, LF })));
+ EXPECT_STREQ("a<CR><LF><CR><LF>", stringify(normalizeLineEndingsToCRLF({ letterA, LF, CR })));
+
+ EXPECT_STREQ("a<CR><LF>b", stringify(normalizeLineEndingsToCRLF({ letterA, CR, letterB })));
+ EXPECT_STREQ("a<CR><LF>b", stringify(normalizeLineEndingsToCRLF({ letterA, LF, letterB })));
+ EXPECT_STREQ("a<CR><LF>b", stringify(normalizeLineEndingsToCRLF({ letterA, CR, LF, letterB })));
+ EXPECT_STREQ("a<CR><LF><CR><LF>b", stringify(normalizeLineEndingsToCRLF({ letterA, LF, CR, letterB })));
+}
+
+} // namespace TestWebKitAPI
#include "config.h"
+#include <WebCore/TextCodec.h>
#include <WebCore/TextEncoding.h>
#include <WebCore/TextEncodingRegistry.h>
#include <wtf/text/StringBuilder.h>
for (size_t i = 0; i < size; ++i) {
auto vector = decodeHexTestBytes(inputs.begin()[i]);
bool last = i == size - 1;
- resultBuilder.append(escapeNonASCIIPrintableCharacters(codec->decode(vector.data(), vector.size(), last)));
+ bool sawError = false;
+ resultBuilder.append(escapeNonASCIIPrintableCharacters(codec->decode(vector.data(), vector.size(), last, false, sawError)));
+ if (sawError)
+ resultBuilder.appendLiteral(" ERROR");
}
return escapeNonASCIIPrintableCharacters(resultBuilder.toString());
}
TEST(TextCodec, UTF8InvalidSequences)
{
- EXPECT_STREQ("{FFFD}?", testDecode("UTF-8", { "E0 A5 3F" }));
- EXPECT_STREQ("{FFFD}?", testDecode("UTF-8", { "E0 A5", "3F" }));
- EXPECT_STREQ("{FFFD}?", testDecode("UTF-8", { "E0", "A5 3F" }));
- EXPECT_STREQ("{FFFD}?", testDecode("UTF-8", { "E0", "A5", "3F" }));
- EXPECT_STREQ("{FFFD}?", testDecode("UTF-8", { "E0", "", "A5", "", "3F" }));
- EXPECT_STREQ("{FFFD}?", testDecode("UTF-8", { "E0", "", "A5", "", "3F", "" }));
- EXPECT_STREQ("{FFFD}?", testDecode("UTF-8", { "", "E0", "", "A5", "", "3F", "" }));
-
- EXPECT_STREQ("a{FFFD}?", testDecode("UTF-8", { "61 E0 A5 3F" }));
- EXPECT_STREQ("a{FFFD}?", testDecode("UTF-8", { "61 E0 A5", "3F" }));
- EXPECT_STREQ("a{FFFD}?", testDecode("UTF-8", { "61 E0", "A5 3F" }));
- EXPECT_STREQ("a{FFFD}?", testDecode("UTF-8", { "61 E0", "A5", "3F" }));
-
- EXPECT_STREQ("{B6}{FFFD}?", testDecode("UTF-8", { "C2 B6 E0 A5 3F" }));
- EXPECT_STREQ("{B6}{FFFD}?", testDecode("UTF-8", { "C2 B6 E0 A5", "3F" }));
- EXPECT_STREQ("{B6}{FFFD}?", testDecode("UTF-8", { "C2 B6 E0", "A5 3F" }));
- EXPECT_STREQ("{B6}{FFFD}?", testDecode("UTF-8", { "C2 B6 E0", "A5", "3F" }));
-
- EXPECT_STREQ("{FFFD}", testDecode("UTF-8", { "C2" }));
- EXPECT_STREQ("{FFFD}", testDecode("UTF-8", { "E2" }));
- EXPECT_STREQ("{FFFD}", testDecode("UTF-8", { "E2 98" }));
- EXPECT_STREQ("{FFFD}", testDecode("UTF-8", { "F0" }));
- EXPECT_STREQ("{FFFD}", testDecode("UTF-8", { "F0 9F" }));
- EXPECT_STREQ("{FFFD}", testDecode("UTF-8", { "F0 9F 92" }));
-
- EXPECT_STREQ("{FFFD}", testDecode("UTF-8", { "E2", "98" }));
- EXPECT_STREQ("{FFFD}", testDecode("UTF-8", { "F0", "9F" }));
- EXPECT_STREQ("{FFFD}", testDecode("UTF-8", { "F0 9F", "92" }));
- EXPECT_STREQ("{FFFD}", testDecode("UTF-8", { "F0", "9F92" }));
- EXPECT_STREQ("{FFFD}", testDecode("UTF-8", { "F0", "9F", "92" }));
-
- EXPECT_STREQ("{FFFD}{FFFD}", testDecode("UTF-8", { "C0 80" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "E0 80 80" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "F0 80 80 80" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "F8 80 80 80 80" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "FC 80 80 80 80 80" }));
-
- EXPECT_STREQ("{FFFD}{FFFD}", testDecode("UTF-8", { "C1 BF" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "E0 81 BF" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "F0 80 81 BF" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "F8 80 80 81 BF" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "FC 80 80 80 81 BF" }));
-
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "E0 82 80" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "F0 80 82 80" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "F8 80 80 82 80" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "FC 80 80 80 82 80" }));
-
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "E0 9F BF" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "F0 80 9F BF" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "F8 80 80 9F BF" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "FC 80 80 80 9F BF" }));
-
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "F0 80 A0 80" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "F8 80 80 A0 80" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "FC 80 80 80 A0 80" }));
-
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "F0 8F BF BF" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "F8 80 8F BF BF" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "FC 80 80 8F BF BF" }));
-
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "F8 80 90 80 80" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "FC 80 80 90 80 80" }));
-
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "F8 84 8F BF BF" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "FC 80 84 8F BF BF" }));
-
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "F4 90 80 80" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "FB BF BF BF BF" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "FD BF BF BF BF BF" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "ED A0 80" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "ED BF BF" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "ED A0 BD ED B2 A9" }));
-
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "F8 84 90 80 80" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "FC 80 84 90 80 80" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "F0 8D A0 80" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "F0 8D BF BF" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "F0 8D A0 BD F0 8D B2 A9" }));
-
- EXPECT_STREQ("{FFFD}", testDecode("UTF-8", { "80" }));
- EXPECT_STREQ("{FFFD}{FFFD}", testDecode("UTF-8", { "80 80" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "80 80 80" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "80 80 80 80" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "80 80 80 80 80" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "80 80 80 80 80 80" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "80 80 80 80 80 80 80" }));
- EXPECT_STREQ("{B6}{FFFD}", testDecode("UTF-8", { "C2 B6 80" }));
- EXPECT_STREQ("{2603}{FFFD}", testDecode("UTF-8", { "E2 98 83 80" }));
- EXPECT_STREQ("{1F4A9}{FFFD}", testDecode("UTF-8", { "F0 9F 92 A9 80" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "FB BF BF BF BF 80" }));
- EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}", testDecode("UTF-8", { "FD BF BF BF BF BF 80" }));
-
- EXPECT_STREQ("{FFFD}", testDecode("UTF-8", { "FE" }));
- EXPECT_STREQ("{FFFD}{FFFD}", testDecode("UTF-8", { "FE 80" }));
- EXPECT_STREQ("{FFFD}", testDecode("UTF-8", { "FF" }));
- EXPECT_STREQ("{FFFD}{FFFD}", testDecode("UTF-8", { "FF 80" }));
+ EXPECT_STREQ("{FFFD}? ERROR", testDecode("UTF-8", { "E0 A5 3F" }));
+ EXPECT_STREQ("{FFFD}? ERROR", testDecode("UTF-8", { "E0 A5", "3F" }));
+ EXPECT_STREQ("{FFFD}? ERROR", testDecode("UTF-8", { "E0", "A5 3F" }));
+ EXPECT_STREQ("{FFFD}? ERROR", testDecode("UTF-8", { "E0", "A5", "3F" }));
+ EXPECT_STREQ("{FFFD}? ERROR", testDecode("UTF-8", { "E0", "", "A5", "", "3F" }));
+ EXPECT_STREQ("{FFFD}? ERROR", testDecode("UTF-8", { "E0", "", "A5", "", "3F", "" }));
+ EXPECT_STREQ("{FFFD}? ERROR", testDecode("UTF-8", { "", "E0", "", "A5", "", "3F", "" }));
+
+ EXPECT_STREQ("a{FFFD}? ERROR", testDecode("UTF-8", { "61 E0 A5 3F" }));
+ EXPECT_STREQ("a{FFFD}? ERROR", testDecode("UTF-8", { "61 E0 A5", "3F" }));
+ EXPECT_STREQ("a{FFFD}? ERROR", testDecode("UTF-8", { "61 E0", "A5 3F" }));
+ EXPECT_STREQ("a{FFFD}? ERROR", testDecode("UTF-8", { "61 E0", "A5", "3F" }));
+
+ EXPECT_STREQ("{B6}{FFFD}? ERROR", testDecode("UTF-8", { "C2 B6 E0 A5 3F" }));
+ EXPECT_STREQ("{B6}{FFFD}? ERROR", testDecode("UTF-8", { "C2 B6 E0 A5", "3F" }));
+ EXPECT_STREQ("{B6}{FFFD}? ERROR", testDecode("UTF-8", { "C2 B6 E0", "A5 3F" }));
+ EXPECT_STREQ("{B6}{FFFD}? ERROR", testDecode("UTF-8", { "C2 B6 E0", "A5", "3F" }));
+
+ EXPECT_STREQ("{FFFD} ERROR", testDecode("UTF-8", { "C2" }));
+ EXPECT_STREQ("{FFFD} ERROR", testDecode("UTF-8", { "E2" }));
+ EXPECT_STREQ("{FFFD} ERROR", testDecode("UTF-8", { "E2 98" }));
+ EXPECT_STREQ("{FFFD} ERROR", testDecode("UTF-8", { "F0" }));
+ EXPECT_STREQ("{FFFD} ERROR", testDecode("UTF-8", { "F0 9F" }));
+ EXPECT_STREQ("{FFFD} ERROR", testDecode("UTF-8", { "F0 9F 92" }));
+
+ EXPECT_STREQ("{FFFD} ERROR", testDecode("UTF-8", { "E2", "98" }));
+ EXPECT_STREQ("{FFFD} ERROR", testDecode("UTF-8", { "F0", "9F" }));
+ EXPECT_STREQ("{FFFD} ERROR", testDecode("UTF-8", { "F0 9F", "92" }));
+ EXPECT_STREQ("{FFFD} ERROR", testDecode("UTF-8", { "F0", "9F92" }));
+ EXPECT_STREQ("{FFFD} ERROR", testDecode("UTF-8", { "F0", "9F", "92" }));
+
+ EXPECT_STREQ("{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "C0 80" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "E0 80 80" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "F0 80 80 80" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "F8 80 80 80 80" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "FC 80 80 80 80 80" }));
+
+ EXPECT_STREQ("{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "C1 BF" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "E0 81 BF" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "F0 80 81 BF" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "F8 80 80 81 BF" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "FC 80 80 80 81 BF" }));
+
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "E0 82 80" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "F0 80 82 80" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "F8 80 80 82 80" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "FC 80 80 80 82 80" }));
+
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "E0 9F BF" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "F0 80 9F BF" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "F8 80 80 9F BF" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "FC 80 80 80 9F BF" }));
+
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "F0 80 A0 80" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "F8 80 80 A0 80" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "FC 80 80 80 A0 80" }));
+
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "F0 8F BF BF" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "F8 80 8F BF BF" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "FC 80 80 8F BF BF" }));
+
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "F8 80 90 80 80" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "FC 80 80 90 80 80" }));
+
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "F8 84 8F BF BF" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "FC 80 84 8F BF BF" }));
+
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "F4 90 80 80" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "FB BF BF BF BF" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "FD BF BF BF BF BF" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "ED A0 80" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "ED BF BF" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "ED A0 BD ED B2 A9" }));
+
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "F8 84 90 80 80" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "FC 80 84 90 80 80" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "F0 8D A0 80" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "F0 8D BF BF" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "F0 8D A0 BD F0 8D B2 A9" }));
+
+ EXPECT_STREQ("{FFFD} ERROR", testDecode("UTF-8", { "80" }));
+ EXPECT_STREQ("{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "80 80" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "80 80 80" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "80 80 80 80" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "80 80 80 80 80" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "80 80 80 80 80 80" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "80 80 80 80 80 80 80" }));
+ EXPECT_STREQ("{B6}{FFFD} ERROR", testDecode("UTF-8", { "C2 B6 80" }));
+ EXPECT_STREQ("{2603}{FFFD} ERROR", testDecode("UTF-8", { "E2 98 83 80" }));
+ EXPECT_STREQ("{1F4A9}{FFFD} ERROR", testDecode("UTF-8", { "F0 9F 92 A9 80" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "FB BF BF BF BF 80" }));
+ EXPECT_STREQ("{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "FD BF BF BF BF BF 80" }));
+
+ EXPECT_STREQ("{FFFD} ERROR", testDecode("UTF-8", { "FE" }));
+ EXPECT_STREQ("{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "FE 80" }));
+ EXPECT_STREQ("{FFFD} ERROR", testDecode("UTF-8", { "FF" }));
+ EXPECT_STREQ("{FFFD}{FFFD} ERROR", testDecode("UTF-8", { "FF 80" }));
}
}
#include <WebKit/WKImageCG.h>
#include <wtf/MD5.h>
#include <wtf/RetainPtr.h>
-#include <wtf/StringExtras.h>
#if PLATFORM(MAC) && !PLATFORM(IOS)
#include <LaunchServices/UTCoreTypes.h>