2 * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #include "TextCodec.h"
30 #include "PlatformString.h"
31 #include <wtf/StringExtras.h>
35 const UChar BOM = 0xFEFF;
37 TextCodec::~TextCodec()
41 // We strip BOM characters because they can show up both at the start of content
42 // and inside content, and we never want them to end up in the decoded text.
43 void TextCodec::appendOmittingBOM(Vector<UChar>& v, const UChar* characters, size_t length)
46 for (size_t i = 0; i != length; ++i) {
47 if (BOM == characters[i]) {
49 v.append(&characters[start], i - start);
54 v.append(&characters[start], length - start);
57 int TextCodec::getUnencodableReplacement(unsigned codePoint, UnencodableHandling handling, UnencodableReplacementArray replacement)
60 case QuestionMarksForUnencodables:
64 case EntitiesForUnencodables:
65 snprintf(replacement, sizeof(UnencodableReplacementArray), "&#%u;", codePoint);
66 return static_cast<int>(strlen(replacement));
67 case URLEncodedEntitiesForUnencodables:
68 snprintf(replacement, sizeof(UnencodableReplacementArray), "%%26%%23%u%%3B", codePoint);
69 return static_cast<int>(strlen(replacement));
76 } // namespace WebCore