1 // -*- c-basic-offset: 2 -*-
3 * This file is part of the KDE libraries
4 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
5 * Copyright (C) 2004 Apple Computer, Inc.
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 * Boston, MA 02111-1307, USA.
39 #include "operations.h"
40 #include "identifier.h"
46 #include <unicode/uchar.h>
48 // malloc_good_size is not prototyped anywhere!
50 size_t malloc_good_size(size_t size);
57 extern const double NaN;
58 extern const double Inf;
60 CString::CString(const char *c)
63 data = new char[length+1];
64 memcpy(data, c, length + 1);
67 CString::CString(const char *c, int len)
70 data = new char[len+1];
75 CString::CString(const CString &b)
78 if (length > 0 && b.data) {
79 data = new char[length+1];
80 memcpy(data, b.data, length + 1);
92 CString &CString::append(const CString &t)
95 n = new char[length+t.length+1];
97 memcpy(n, data, length);
99 memcpy(n+length, t.data, t.length);
109 CString &CString::operator=(const char *c)
114 data = new char[length+1];
115 memcpy(data, c, length + 1);
120 CString &CString::operator=(const CString &str)
128 if (length > 0 && str.data) {
129 data = new char[length + 1];
130 memcpy(data, str.data, length + 1);
139 bool KJS::operator==(const KJS::CString& c1, const KJS::CString& c2)
142 return len == c2.size() && (len == 0 || memcmp(c1.c_str(), c2.c_str(), len) == 0);
146 UString::Rep UString::Rep::null = { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
147 UString::Rep UString::Rep::empty = { 0, 0, 1, 0, 0, 0, &dummy, 0, 0, 0, 0 };
148 const int normalStatBufferSize = 4096;
149 static char *statBuffer = 0;
150 static int statBufferSize = 0;
152 UChar UChar::toLower() const
155 return static_cast<unsigned short>(u_tolower(uc));
157 // ### properly support unicode tolower
158 if (uc >= 256 || islower(uc))
161 return (unsigned char)tolower(uc);
165 UChar UChar::toUpper() const
168 return static_cast<unsigned short>(u_toupper(uc));
170 if (uc >= 256 || isupper(uc))
173 return (unsigned char)toupper(uc);
177 UCharReference& UCharReference::operator=(UChar c)
180 if (offset < str->rep->len)
181 *(str->rep->data() + offset) = c;
182 /* TODO: lengthen string ? */
186 UChar& UCharReference::ref() const
188 if (offset < str->rep->len)
189 return *(str->rep->data() + offset);
191 static UChar callerBetterNotModifyThis('\0');
192 return callerBetterNotModifyThis;
196 UString::Rep *UString::Rep::create(UChar *d, int l)
208 r->usedPreCapacity = 0;
213 UString::Rep *UString::Rep::create(Rep *base, int offset, int length)
217 int baseOffset = base->offset;
219 if (base->baseString) {
220 base = base->baseString;
223 assert(-(offset + baseOffset) <= base->usedPreCapacity);
224 assert(offset + baseOffset + length <= base->usedCapacity);
227 r->offset = baseOffset + offset;
232 r->baseString = base;
237 r->usedPreCapacity = 0;
242 void UString::Rep::destroy()
245 Identifier::remove(this);
254 // Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
255 // or anything like that.
256 const unsigned PHI = 0x9e3779b9U;
258 // This hash algorithm comes from:
259 // http://burtleburtle.net/bob/hash/hashfaq.html
260 // http://burtleburtle.net/bob/hash/doobs.html
261 unsigned UString::Rep::computeHash(const UChar *s, int length)
263 int prefixLength = length < 8 ? length : 8;
264 int suffixPosition = length < 16 ? 8 : length - 8;
271 for (int i = 0; i < prefixLength; i++) {
276 for (int i = suffixPosition; i < length; i++){
292 // This hash algorithm comes from:
293 // http://burtleburtle.net/bob/hash/hashfaq.html
294 // http://burtleburtle.net/bob/hash/doobs.html
295 unsigned UString::Rep::computeHash(const char *s)
297 int length = strlen(s);
298 int prefixLength = length < 8 ? length : 8;
299 int suffixPosition = length < 16 ? 8 : length - 8;
306 for (int i = 0; i < prefixLength; i++) {
307 h += (unsigned char)s[i];
311 for (int i = suffixPosition; i < length; i++) {
312 h += (unsigned char)s[i];
327 // put these early so they can be inlined
328 inline int UString::expandedSize(int size, int otherSize) const
330 int s = (size * 11 / 10) + 1 + otherSize;
332 s = malloc_good_size(s * sizeof(UChar)) / sizeof(UChar);
337 inline int UString::usedCapacity() const
339 return rep->baseString ? rep->baseString->usedCapacity : rep->usedCapacity;
342 inline int UString::usedPreCapacity() const
344 return rep->baseString ? rep->baseString->usedPreCapacity : rep->usedPreCapacity;
347 void UString::expandCapacity(int requiredLength)
349 Rep *r = rep->baseString ? rep->baseString : rep;
351 if (requiredLength > r->capacity) {
352 int newCapacity = expandedSize(requiredLength, r->preCapacity);
353 r->buf = static_cast<UChar *>(realloc(r->buf, newCapacity * sizeof(UChar)));
354 r->capacity = newCapacity - r->preCapacity;
356 if (requiredLength > r->usedCapacity) {
357 r->usedCapacity = requiredLength;
361 void UString::expandPreCapacity(int requiredPreCap)
363 Rep *r = rep->baseString ? rep->baseString : rep;
365 if (requiredPreCap > r->preCapacity) {
366 int newCapacity = expandedSize(requiredPreCap, r->capacity);
367 int delta = newCapacity - r->capacity - r->preCapacity;
369 UChar *newBuf = static_cast<UChar *>(malloc(newCapacity * sizeof(UChar)));
370 memcpy(newBuf + delta, r->buf, (r->capacity + r->preCapacity) * sizeof(UChar));
374 r->preCapacity = newCapacity - r->capacity;
376 if (requiredPreCap > r->usedPreCapacity) {
377 r->usedPreCapacity = requiredPreCap;
387 UString::UString(char c)
389 UChar *d = static_cast<UChar *>(malloc(sizeof(UChar)));
391 rep = Rep::create(d, 1);
394 UString::UString(const char *c)
400 int length = strlen(c);
405 UChar *d = static_cast<UChar *>(malloc(sizeof(UChar) * length));
406 for (int i = 0; i < length; i++)
408 rep = Rep::create(d, length);
411 UString::UString(const UChar *c, int length)
417 UChar *d = static_cast<UChar *>(malloc(sizeof(UChar) *length));
418 memcpy(d, c, length * sizeof(UChar));
419 rep = Rep::create(d, length);
422 UString::UString(UChar *c, int length, bool copy)
430 d = static_cast<UChar *>(malloc(sizeof(UChar) * length));
431 memcpy(d, c, length * sizeof(UChar));
434 rep = Rep::create(d, length);
437 UString::UString(const UString &a, const UString &b)
439 int aSize = a.size();
440 int aOffset = a.rep->offset;
441 int bSize = b.size();
442 int bOffset = b.rep->offset;
443 int length = aSize + bSize;
450 } else if (bSize == 0) {
453 } else if (aOffset + aSize == a.usedCapacity() && 4 * aSize >= bSize &&
454 (-bOffset != b.usedPreCapacity() || aSize >= bSize)) {
455 // - a reaches the end of its buffer so it qualifies for shared append
456 // - also, it's at least a quarter the length of b - appending to a much shorter
457 // string does more harm than good
458 // - however, if b qualifies for prepend and is longer than a, we'd rather prepend
460 x.expandCapacity(aOffset + length);
461 memcpy(const_cast<UChar *>(a.data() + aSize), b.data(), bSize * sizeof(UChar));
462 rep = Rep::create(a.rep, 0, length);
463 } else if (-bOffset == b.usedPreCapacity() && 4 * bSize >= aSize) {
464 // - b reaches the beginning of its buffer so it qualifies for shared prepend
465 // - also, it's at least a quarter the length of a - prepending to a much shorter
466 // string does more harm than good
468 y.expandPreCapacity(-bOffset + aSize);
469 memcpy(const_cast<UChar *>(b.data() - aSize), a.data(), aSize * sizeof(UChar));
470 rep = Rep::create(b.rep, -aSize, length);
472 // a does not qualify for append, and b does not qualify for prepend, gotta make a whole new string
473 int newCapacity = expandedSize(length, 0);
474 UChar *d = static_cast<UChar *>(malloc(sizeof(UChar) * newCapacity));
475 memcpy(d, a.data(), aSize * sizeof(UChar));
476 memcpy(d + aSize, b.data(), bSize * sizeof(UChar));
477 rep = Rep::create(d, length);
478 rep->capacity = newCapacity;
482 const UString &UString::null()
488 UString UString::from(int i)
490 return from((long)i);
493 UString UString::from(unsigned int u)
496 UChar *end = buf + 20;
503 *--p = (unsigned short)((u % 10) + '0');
508 return UString(p, end - p);
511 UString UString::from(long l)
514 UChar *end = buf + 20;
519 } else if (l == LONG_MIN) {
521 sprintf(minBuf, "%ld", LONG_MIN);
522 return UString(minBuf);
524 bool negative = false;
530 *--p = (unsigned short)((l % 10) + '0');
538 return UString(p, end - p);
541 UString UString::from(double d)
547 char *result = kjs_dtoa(d, 0, 0, &decimalPoint, &sign, NULL);
548 int length = strlen(result);
555 if (decimalPoint <= 0 && decimalPoint > -6) {
558 for (int j = decimalPoint; j < 0; j++) {
561 strcpy(buf + i, result);
562 } else if (decimalPoint <= 21 && decimalPoint > 0) {
563 if (length <= decimalPoint) {
564 strcpy(buf + i, result);
566 for (int j = 0; j < decimalPoint - length; j++) {
571 strncpy(buf + i, result, decimalPoint);
574 strcpy(buf + i, result + decimalPoint);
576 } else if (result[0] < '0' || result[0] > '9') {
577 strcpy(buf + i, result);
579 buf[i++] = result[0];
582 strcpy(buf + i, result + 1);
587 buf[i++] = (decimalPoint >= 0) ? '+' : '-';
588 // decimalPoint can't be more than 3 digits decimal given the
589 // nature of float representation
590 int exponential = decimalPoint - 1;
591 if (exponential < 0) {
592 exponential = exponential * -1;
594 if (exponential >= 100) {
595 buf[i++] = '0' + exponential / 100;
597 if (exponential >= 10) {
598 buf[i++] = '0' + (exponential % 100) / 10;
600 buf[i++] = '0' + exponential % 10;
604 kjs_freedtoa(result);
609 UString &UString::append(const UString &t)
611 int thisSize = size();
612 int thisOffset = rep->offset;
613 int tSize = t.size();
614 int length = thisSize + tSize;
620 } else if (tSize == 0) {
622 } else if (!rep->baseString && rep->rc == 1) {
623 // this is direct and has refcount of 1 (so we can just alter it directly)
624 expandCapacity(thisOffset + length);
625 memcpy(const_cast<UChar *>(data() + thisSize), t.data(), tSize * sizeof(UChar));
628 } else if (thisOffset + thisSize == usedCapacity()) {
629 // this reaches the end of the buffer - extend it
630 expandCapacity(thisOffset + length);
631 memcpy(const_cast<UChar *>(data() + thisSize), t.data(), tSize * sizeof(UChar));
632 Rep *newRep = Rep::create(rep, 0, length);
636 // this is shared with someone using more capacity, gotta make a whole new string
637 int newCapacity = expandedSize(length, 0);
638 UChar *d = static_cast<UChar *>(malloc(sizeof(UChar) * newCapacity));
639 memcpy(d, data(), thisSize * sizeof(UChar));
640 memcpy(const_cast<UChar *>(d + thisSize), t.data(), tSize * sizeof(UChar));
642 rep = Rep::create(d, length);
643 rep->capacity = newCapacity;
649 UString &UString::append(const char *t)
651 int thisSize = size();
652 int thisOffset = rep->offset;
653 int tSize = strlen(t);
654 int length = thisSize + tSize;
660 } else if (tSize == 0) {
661 // t is empty, we'll just return *this below.
662 } else if (!rep->baseString && rep->rc == 1) {
663 // this is direct and has refcount of 1 (so we can just alter it directly)
664 expandCapacity(thisOffset + length);
665 UChar *d = const_cast<UChar *>(data());
666 for (int i = 0; i < tSize; ++i)
667 d[thisSize+i] = t[i];
670 } else if (thisOffset + thisSize == usedCapacity()) {
671 // this string reaches the end of the buffer - extend it
672 expandCapacity(thisOffset + length);
673 UChar *d = const_cast<UChar *>(data());
674 for (int i = 0; i < tSize; ++i)
675 d[thisSize+i] = t[i];
676 Rep *newRep = Rep::create(rep, 0, length);
680 // this is shared with someone using more capacity, gotta make a whole new string
681 int newCapacity = expandedSize(length, 0);
682 UChar *d = static_cast<UChar *>(malloc(sizeof(UChar) * newCapacity));
683 memcpy(d, data(), thisSize * sizeof(UChar));
684 for (int i = 0; i < tSize; ++i)
685 d[thisSize+i] = t[i];
687 rep = Rep::create(d, length);
688 rep->capacity = newCapacity;
694 UString &UString::append(unsigned short c)
696 int thisOffset = rep->offset;
701 // this is empty - must make a new rep because we don't want to pollute the shared empty one
702 int newCapacity = expandedSize(1, 0);
703 UChar *d = static_cast<UChar *>(malloc(sizeof(UChar) * newCapacity));
706 rep = Rep::create(d, 1);
707 rep->capacity = newCapacity;
708 } else if (!rep->baseString && rep->rc == 1) {
709 // this is direct and has refcount of 1 (so we can just alter it directly)
710 expandCapacity(thisOffset + length + 1);
711 UChar *d = const_cast<UChar *>(data());
713 rep->len = length + 1;
715 } else if (thisOffset + length == usedCapacity()) {
716 // this reaches the end of the string - extend it and share
717 expandCapacity(thisOffset + length + 1);
718 UChar *d = const_cast<UChar *>(data());
720 Rep *newRep = Rep::create(rep, 0, length + 1);
724 // this is shared with someone using more capacity, gotta make a whole new string
725 int newCapacity = expandedSize((length + 1), 0);
726 UChar *d = static_cast<UChar *>(malloc(sizeof(UChar) * newCapacity));
727 memcpy(d, data(), length * sizeof(UChar));
730 rep = Rep::create(d, length);
731 rep->capacity = newCapacity;
737 CString UString::cstring() const
742 char *UString::ascii() const
744 // Never make the buffer smaller than normalStatBufferSize.
745 // Thus we almost never need to reallocate.
747 int neededSize = length + 1;
748 if (neededSize < normalStatBufferSize) {
749 neededSize = normalStatBufferSize;
751 if (neededSize != statBufferSize) {
752 delete [] statBuffer;
753 statBuffer = new char [neededSize];
754 statBufferSize = neededSize;
757 const UChar *p = data();
758 char *q = statBuffer;
759 const UChar *limit = p + length;
771 void UString::globalClear()
773 delete [] statBuffer;
779 UString &UString::operator=(const char *c)
781 int l = c ? strlen(c) : 0;
783 if (rep->rc == 1 && l <= rep->capacity && !rep->baseString && rep->offset == 0 && rep->preCapacity == 0) {
788 d = static_cast<UChar *>(malloc(sizeof(UChar) * l));
789 rep = Rep::create(d, l);
791 for (int i = 0; i < l; i++)
797 UString &UString::operator=(const UString &str)
806 bool UString::is8Bit() const
808 const UChar *u = data();
809 const UChar *limit = u + size();
819 UChar UString::operator[](int pos) const
826 UCharReference UString::operator[](int pos)
828 /* TODO: boundary check */
829 return UCharReference(this, pos);
832 double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const
836 // FIXME: If tolerateTrailingJunk is true, then we want to tolerate non-8-bit junk
837 // after the number, so is8Bit is too strict a check.
841 const char *c = ascii();
843 // skip leading white space
849 return tolerateEmptyString ? 0.0 : NaN;
852 if (*c == '0' && (*(c+1) == 'x' || *(c+1) == 'X')) {
856 if (*c >= '0' && *c <= '9')
857 d = d * 16.0 + *c - '0';
858 else if ((*c >= 'A' && *c <= 'F') || (*c >= 'a' && *c <= 'f'))
859 d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0;
866 d = kjs_strtod(c, &end);
867 if ((d != 0.0 || end != c) && d != HUGE_VAL && d != -HUGE_VAL) {
874 else if (*c == '-') {
878 if (strncmp(c, "Infinity", 8) != 0)
885 // allow trailing white space
888 // don't allow anything after - unless tolerant=true
889 if (!tolerateTrailingJunk && *c != '\0')
895 double UString::toDouble(bool tolerateTrailingJunk) const
897 return toDouble(tolerateTrailingJunk, true);
900 double UString::toDouble() const
902 return toDouble(false, true);
905 unsigned long UString::toULong(bool *ok, bool tolerateEmptyString) const
907 double d = toDouble(false, tolerateEmptyString);
910 if (isNaN(d) || d != static_cast<unsigned long>(d)) {
918 return static_cast<unsigned long>(d);
921 unsigned long UString::toULong(bool *ok) const
923 return toULong(ok, true);
926 uint32_t UString::toUInt32(bool *ok) const
928 double d = toDouble();
931 if (isNaN(d) || d != static_cast<uint32_t>(d)) {
939 return static_cast<uint32_t>(d);
942 uint32_t UString::toStrictUInt32(bool *ok) const
947 // Empty string is not OK.
951 const UChar *p = rep->data();
952 unsigned short c = p->unicode();
954 // If the first digit is 0, only 0 itself is OK.
961 // Convert to UInt32, checking for overflow.
964 // Process character, turning it into a digit.
965 if (c < '0' || c > '9')
967 const unsigned d = c - '0';
969 // Multiply by 10, checking for overflow out of 32 bits.
970 if (i > 0xFFFFFFFFU / 10)
974 // Add in the digit, checking for overflow out of 32 bits.
975 const unsigned max = 0xFFFFFFFFU - d;
980 // Handle end of string.
987 // Get next character.
988 c = (++p)->unicode();
992 // Rule from ECMA 15.2 about what an array index is.
993 // Must exactly match string form of an unsigned integer, and be less than 2^32 - 1.
994 unsigned UString::toArrayIndex(bool *ok) const
996 unsigned i = toStrictUInt32(ok);
997 if (i >= 0xFFFFFFFFU && ok)
1002 int UString::find(const UString &f, int pos) const
1012 const UChar *end = data() + sz - fsz;
1013 long fsizeminusone = (fsz - 1) * sizeof(UChar);
1014 const UChar *fdata = f.data();
1015 for (const UChar *c = data() + pos; c <= end; c++)
1016 if (*c == *fdata && !memcmp(c + 1, fdata + 1, fsizeminusone))
1022 int UString::find(UChar ch, int pos) const
1026 const UChar *end = data() + size();
1027 for (const UChar *c = data() + pos; c < end; c++)
1034 int UString::rfind(const UString &f, int pos) const
1046 long fsizeminusone = (fsz - 1) * sizeof(UChar);
1047 const UChar *fdata = f.data();
1048 for (const UChar *c = data() + pos; c >= data(); c--) {
1049 if (*c == *fdata && !memcmp(c + 1, fdata + 1, fsizeminusone))
1056 int UString::rfind(UChar ch, int pos) const
1060 if (pos + 1 >= size())
1062 for (const UChar *c = data() + pos; c >= data(); c--) {
1070 UString UString::substr(int pos, int len) const
1083 if (pos == 0 && len == s)
1086 UString::Rep *newRep = Rep::create(rep, pos, len);
1087 UString result(newRep);
1093 void UString::attach(Rep *r)
1099 void UString::detach()
1101 if (rep->rc > 1 || rep->baseString) {
1103 UChar *n = static_cast<UChar *>(malloc(sizeof(UChar) * l));
1104 memcpy(n, data(), l * sizeof(UChar));
1106 rep = Rep::create(n, l);
1110 void UString::release()
1115 bool KJS::operator==(const UString& s1, const UString& s2)
1117 if (s1.rep->len != s2.rep->len)
1120 return (memcmp(s1.rep->data(), s2.rep->data(),
1121 s1.rep->len * sizeof(UChar)) == 0);
1124 bool KJS::operator==(const UString& s1, const char *s2)
1127 return s1.isEmpty();
1130 const UChar *u = s1.data();
1131 const UChar *uend = u + s1.size();
1132 while (u != uend && *s2) {
1133 if (u->uc != (unsigned char)*s2)
1139 return u == uend && *s2 == 0;
1142 bool KJS::operator<(const UString& s1, const UString& s2)
1144 const int l1 = s1.size();
1145 const int l2 = s2.size();
1146 const int lmin = l1 < l2 ? l1 : l2;
1147 const UChar *c1 = s1.data();
1148 const UChar *c2 = s2.data();
1150 while (l < lmin && *c1 == *c2) {
1156 return (c1->uc < c2->uc);
1161 int KJS::compare(const UString& s1, const UString& s2)
1163 const int l1 = s1.size();
1164 const int l2 = s2.size();
1165 const int lmin = l1 < l2 ? l1 : l2;
1166 const UChar *c1 = s1.data();
1167 const UChar *c2 = s2.data();
1169 while (l < lmin && *c1 == *c2) {
1175 return (c1->uc > c2->uc) ? 1 : -1;
1180 return (l1 < l2) ? 1 : -1;
1183 inline int inlineUTF8SequenceLengthNonASCII(char b0)
1185 if ((b0 & 0xC0) != 0xC0)
1187 if ((b0 & 0xE0) == 0xC0)
1189 if ((b0 & 0xF0) == 0xE0)
1191 if ((b0 & 0xF8) == 0xF0)
1196 int UTF8SequenceLengthNonASCII(char b0)
1198 return inlineUTF8SequenceLengthNonASCII(b0);
1201 inline int inlineUTF8SequenceLength(char b0)
1203 return (b0 & 0x80) == 0 ? 1 : UTF8SequenceLengthNonASCII(b0);
1206 // Given a first byte, gives the length of the UTF-8 sequence it begins.
1207 // Returns 0 for bytes that are not legal starts of UTF-8 sequences.
1208 // Only allows sequences of up to 4 bytes, since that works for all Unicode characters (U-00000000 to U-0010FFFF).
1209 int UTF8SequenceLength(char b0)
1211 return (b0 & 0x80) == 0 ? 1 : inlineUTF8SequenceLengthNonASCII(b0);
1214 // Takes a null-terminated C-style string with a UTF-8 sequence in it and converts it to a character.
1215 // Only allows Unicode characters (U-00000000 to U-0010FFFF).
1216 // Returns -1 if the sequence is not valid (including presence of extra bytes).
1217 int decodeUTF8Sequence(const char *sequence)
1219 // Handle 0-byte sequences (never valid).
1220 const unsigned char b0 = sequence[0];
1221 const int length = inlineUTF8SequenceLength(b0);
1225 // Handle 1-byte sequences (plain ASCII).
1226 const unsigned char b1 = sequence[1];
1233 // Handle 2-byte sequences.
1234 if ((b1 & 0xC0) != 0x80)
1236 const unsigned char b2 = sequence[2];
1240 const int c = ((b0 & 0x1F) << 6) | (b1 & 0x3F);
1246 // Handle 3-byte sequences.
1247 if ((b2 & 0xC0) != 0x80)
1249 const unsigned char b3 = sequence[3];
1253 const int c = ((b0 & 0xF) << 12) | ((b1 & 0x3F) << 6) | (b2 & 0x3F);
1256 // UTF-16 surrogates should never appear in UTF-8 data.
1257 if (c >= 0xD800 && c <= 0xDFFF)
1259 // Backwards BOM and U+FFFF should never appear in UTF-8 data.
1260 if (c == 0xFFFE || c == 0xFFFF)
1265 // Handle 4-byte sequences.
1266 if ((b3 & 0xC0) != 0x80)
1268 const unsigned char b4 = sequence[4];
1272 const int c = ((b0 & 0x7) << 18) | ((b1 & 0x3F) << 12) | ((b2 & 0x3F) << 6) | (b3 & 0x3F);
1273 if (c < 0x10000 || c > 0x10FFFF)
1281 CString UString::UTF8String() const
1283 // Allocate a buffer big enough to hold all the characters.
1284 const int length = size();
1285 const unsigned bufferSize = length * 3;
1286 char fixedSizeBuffer[1024];
1288 if (bufferSize > sizeof(fixedSizeBuffer)) {
1289 buffer = new char [bufferSize];
1291 buffer = fixedSizeBuffer;
1294 // Convert to runs of 8-bit characters.
1296 const UChar *d = data();
1297 for (int i = 0; i != length; ++i) {
1298 unsigned short c = d[i].unicode();
1301 } else if (c < 0x800) {
1302 *p++ = (char)((c >> 6) | 0xC0); // C0 is the 2-byte flag for UTF-8
1303 *p++ = (char)((c | 0x80) & 0xBF); // next 6 bits, with high bit set
1304 } else if (c >= 0xD800 && c <= 0xDBFF && i < length && d[i+1].uc >= 0xDC00 && d[i+2].uc <= 0xDFFF) {
1305 unsigned sc = 0x10000 + (((c & 0x3FF) << 10) | (d[i+1].uc & 0x3FF));
1306 *p++ = (char)((sc >> 18) | 0xF0); // F0 is the 4-byte flag for UTF-8
1307 *p++ = (char)(((sc >> 12) | 0x80) & 0xBF); // next 6 bits, with high bit set
1308 *p++ = (char)(((sc >> 6) | 0x80) & 0xBF); // next 6 bits, with high bit set
1309 *p++ = (char)((sc | 0x80) & 0xBF); // next 6 bits, with high bit set
1312 *p++ = (char)((c >> 12) | 0xE0); // E0 is the 3-byte flag for UTF-8
1313 *p++ = (char)(((c >> 6) | 0x80) & 0xBF); // next 6 bits, with high bit set
1314 *p++ = (char)((c | 0x80) & 0xBF); // next 6 bits, with high bit set
1318 // Return the result as a C string.
1319 CString result(buffer, p - buffer);
1320 if (buffer != fixedSizeBuffer) {