2 * Copyright (C) 2014-2016 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23 * THE POSSIBILITY OF SUCH DAMAGE.
29 #include <unicode/utypes.h>
30 #include <wtf/Forward.h>
31 #include <wtf/RetainPtr.h>
32 #include <wtf/Vector.h>
33 #include <wtf/text/CString.h>
34 #include <wtf/text/ConversionMode.h>
35 #include <wtf/text/LChar.h>
36 #include <wtf/text/StringCommon.h>
38 // FIXME: Enabling the StringView lifetime checking causes the MSVC build to fail. Figure out why.
39 // FIXME: Enable StringView lifetime checking once the underlying assertions have been fixed.
40 #if defined(NDEBUG) || COMPILER(MSVC) || 1
41 #define CHECK_STRINGVIEW_LIFETIME 0
43 #define CHECK_STRINGVIEW_LIFETIME 1
48 using CharacterMatchFunction = bool (*)(UChar);
50 // StringView is a non-owning reference to a string, similar to the proposed std::string_view.
55 #if CHECK_STRINGVIEW_LIFETIME
57 StringView(StringView&&);
58 StringView(const StringView&);
59 StringView& operator=(StringView&&);
60 StringView& operator=(const StringView&);
63 StringView(const AtomicString&);
64 StringView(const String&);
65 StringView(const StringImpl&);
66 StringView(const StringImpl*);
67 StringView(const LChar*, unsigned length);
68 StringView(const UChar*, unsigned length);
69 StringView(const char*);
71 static StringView empty();
73 unsigned length() const;
76 explicit operator bool() const;
79 UChar operator[](unsigned index) const;
82 CodeUnits codeUnits() const;
85 CodePoints codePoints() const;
87 class GraphemeClusters;
88 GraphemeClusters graphemeClusters() const;
91 const LChar* characters8() const;
92 const UChar* characters16() const;
94 String toString() const;
95 String toStringWithoutCopying() const;
96 AtomicString toAtomicString() const;
99 // This function converts null strings to empty strings.
100 WTF_EXPORT_STRING_API RetainPtr<CFStringRef> createCFStringWithoutCopying() const;
104 // These functions convert null strings to empty strings.
105 WTF_EXPORT_STRING_API RetainPtr<NSString> createNSString() const;
106 WTF_EXPORT_STRING_API RetainPtr<NSString> createNSStringWithoutCopying() const;
109 WTF_EXPORT_STRING_API CString utf8(ConversionMode = LenientConversion) const;
111 class UpconvertedCharacters;
112 UpconvertedCharacters upconvertedCharacters() const;
114 void getCharactersWithUpconvert(LChar*) const;
115 void getCharactersWithUpconvert(UChar*) const;
117 StringView substring(unsigned start, unsigned length = std::numeric_limits<unsigned>::max()) const;
119 size_t find(UChar, unsigned start = 0) const;
120 size_t find(CharacterMatchFunction, unsigned start = 0) const;
122 WTF_EXPORT_STRING_API size_t find(StringView, unsigned start) const;
124 WTF_EXPORT_STRING_API size_t findIgnoringASCIICase(const StringView&) const;
125 WTF_EXPORT_STRING_API size_t findIgnoringASCIICase(const StringView&, unsigned startOffset) const;
127 bool contains(UChar) const;
128 WTF_EXPORT_STRING_API bool containsIgnoringASCIICase(const StringView&) const;
129 WTF_EXPORT_STRING_API bool containsIgnoringASCIICase(const StringView&, unsigned startOffset) const;
131 WTF_EXPORT_STRING_API bool startsWith(const StringView&) const;
132 WTF_EXPORT_STRING_API bool startsWithIgnoringASCIICase(const StringView&) const;
134 WTF_EXPORT_STRING_API bool endsWith(const StringView&) const;
135 WTF_EXPORT_STRING_API bool endsWithIgnoringASCIICase(const StringView&) const;
138 int toInt(bool& isValid) const;
139 int toIntStrict(bool& isValid) const;
140 float toFloat(bool& isValid) const;
142 static void invalidate(const StringImpl&);
144 struct UnderlyingString;
147 friend bool equal(StringView, StringView);
149 void initialize(const LChar*, unsigned length);
150 void initialize(const UChar*, unsigned length);
152 #if CHECK_STRINGVIEW_LIFETIME
153 WTF_EXPORT_STRING_API bool underlyingStringIsValid() const;
154 WTF_EXPORT_STRING_API void setUnderlyingString(const StringImpl*);
155 WTF_EXPORT_STRING_API void setUnderlyingString(const StringView&);
157 bool underlyingStringIsValid() const { return true; }
158 void setUnderlyingString(const StringImpl*) { }
159 void setUnderlyingString(const StringView&) { }
163 const void* m_characters { nullptr };
164 unsigned m_length { 0 };
165 bool m_is8Bit { true };
167 #if CHECK_STRINGVIEW_LIFETIME
168 void adoptUnderlyingString(UnderlyingString*);
169 UnderlyingString* m_underlyingString { nullptr };
173 template<typename CharacterType, size_t inlineCapacity> void append(Vector<CharacterType, inlineCapacity>&, StringView);
175 bool equal(StringView, StringView);
176 bool equal(StringView, const LChar*);
177 bool equal(StringView, const char*);
179 bool equalIgnoringASCIICase(StringView, StringView);
180 bool equalIgnoringASCIICase(StringView, const char*);
182 template<unsigned length> bool equalLettersIgnoringASCIICase(StringView, const char (&lowercaseLetters)[length]);
184 inline bool operator==(StringView a, StringView b) { return equal(a, b); }
185 inline bool operator==(StringView a, const LChar* b) { return equal(a, b); }
186 inline bool operator==(StringView a, const char* b) { return equal(a, b); }
187 inline bool operator==(const LChar* a, StringView b) { return equal(b, a); }
188 inline bool operator==(const char* a, StringView b) { return equal(b, a); }
190 inline bool operator!=(StringView a, StringView b) { return !equal(a, b); }
191 inline bool operator!=(StringView a, const LChar* b) { return !equal(a, b); }
192 inline bool operator!=(StringView a, const char* b) { return !equal(a, b); }
193 inline bool operator!=(const LChar* a, StringView b) { return !equal(b, a); }
194 inline bool operator!=(const char* a, StringView b) { return !equal(b, a); }
198 #include <wtf/text/AtomicString.h>
199 #include <wtf/text/WTFString.h>
203 inline StringView::StringView()
205 // FIXME: It's peculiar that null strings are 16-bit and empty strings return 8-bit (according to the is8Bit function).
208 #if CHECK_STRINGVIEW_LIFETIME
209 inline StringView::~StringView()
211 setUnderlyingString(nullptr);
214 inline StringView::StringView(StringView&& other)
215 : m_characters(other.m_characters)
216 , m_length(other.m_length)
217 , m_is8Bit(other.m_is8Bit)
219 ASSERT(other.underlyingStringIsValid());
223 setUnderlyingString(other);
224 other.setUnderlyingString(nullptr);
227 inline StringView::StringView(const StringView& other)
228 : m_characters(other.m_characters)
229 , m_length(other.m_length)
230 , m_is8Bit(other.m_is8Bit)
232 ASSERT(other.underlyingStringIsValid());
234 setUnderlyingString(other);
237 inline StringView& StringView::operator=(StringView&& other)
239 ASSERT(other.underlyingStringIsValid());
241 m_characters = other.m_characters;
242 m_length = other.m_length;
243 m_is8Bit = other.m_is8Bit;
247 setUnderlyingString(other);
248 other.setUnderlyingString(nullptr);
253 inline StringView& StringView::operator=(const StringView& other)
255 ASSERT(other.underlyingStringIsValid());
257 m_characters = other.m_characters;
258 m_length = other.m_length;
259 m_is8Bit = other.m_is8Bit;
261 setUnderlyingString(other);
265 #endif // CHECK_STRINGVIEW_LIFETIME
267 inline void StringView::initialize(const LChar* characters, unsigned length)
269 m_characters = characters;
274 inline void StringView::initialize(const UChar* characters, unsigned length)
276 m_characters = characters;
281 inline StringView::StringView(const LChar* characters, unsigned length)
283 initialize(characters, length);
286 inline StringView::StringView(const UChar* characters, unsigned length)
288 initialize(characters, length);
291 inline StringView::StringView(const char* characters)
293 initialize(reinterpret_cast<const LChar*>(characters), strlen(characters));
296 inline StringView::StringView(const StringImpl& string)
298 setUnderlyingString(&string);
300 initialize(string.characters8(), string.length());
302 initialize(string.characters16(), string.length());
305 inline StringView::StringView(const StringImpl* string)
310 setUnderlyingString(string);
311 if (string->is8Bit())
312 initialize(string->characters8(), string->length());
314 initialize(string->characters16(), string->length());
317 inline StringView::StringView(const String& string)
319 setUnderlyingString(string.impl());
320 if (!string.impl()) {
324 if (string.is8Bit()) {
325 initialize(string.characters8(), string.length());
328 initialize(string.characters16(), string.length());
331 inline StringView::StringView(const AtomicString& atomicString)
332 : StringView(atomicString.string())
336 inline void StringView::clear()
338 m_characters = nullptr;
343 inline StringView StringView::empty()
345 return StringView(reinterpret_cast<const LChar*>(""), 0);
348 inline const LChar* StringView::characters8() const
351 ASSERT(underlyingStringIsValid());
352 return static_cast<const LChar*>(m_characters);
355 inline const UChar* StringView::characters16() const
358 ASSERT(underlyingStringIsValid());
359 return static_cast<const UChar*>(m_characters);
362 class StringView::UpconvertedCharacters {
364 explicit UpconvertedCharacters(const StringView&);
365 operator const UChar*() const { return m_characters; }
366 const UChar* get() const { return m_characters; }
368 Vector<UChar, 32> m_upconvertedCharacters;
369 const UChar* m_characters;
372 inline StringView::UpconvertedCharacters StringView::upconvertedCharacters() const
374 return UpconvertedCharacters(*this);
377 inline bool StringView::isNull() const
379 return !m_characters;
382 inline bool StringView::isEmpty() const
387 inline unsigned StringView::length() const
392 inline StringView::operator bool() const
397 inline bool StringView::is8Bit() const
402 inline StringView StringView::substring(unsigned start, unsigned length) const
404 if (start >= this->length())
406 unsigned maxLength = this->length() - start;
408 if (length >= maxLength) {
415 StringView result(characters8() + start, length);
416 result.setUnderlyingString(*this);
419 StringView result(characters16() + start, length);
420 result.setUnderlyingString(*this);
424 inline UChar StringView::operator[](unsigned index) const
426 ASSERT(index < length());
428 return characters8()[index];
429 return characters16()[index];
432 inline bool StringView::contains(UChar character) const
434 return find(character) != notFound;
437 inline void StringView::getCharactersWithUpconvert(LChar* destination) const
440 auto characters8 = this->characters8();
441 for (unsigned i = 0; i < m_length; ++i)
442 destination[i] = characters8[i];
445 inline void StringView::getCharactersWithUpconvert(UChar* destination) const
448 auto characters8 = this->characters8();
449 for (unsigned i = 0; i < m_length; ++i)
450 destination[i] = characters8[i];
453 auto characters16 = this->characters16();
454 for (unsigned i = 0; i < m_length; ++i)
455 destination[i] = characters16[i];
458 inline StringView::UpconvertedCharacters::UpconvertedCharacters(const StringView& string)
460 if (!string.is8Bit()) {
461 m_characters = string.characters16();
464 const LChar* characters8 = string.characters8();
465 unsigned length = string.m_length;
466 m_upconvertedCharacters.reserveInitialCapacity(length);
467 for (unsigned i = 0; i < length; ++i)
468 m_upconvertedCharacters.uncheckedAppend(characters8[i]);
469 m_characters = m_upconvertedCharacters.data();
472 inline String StringView::toString() const
475 return String(characters8(), m_length);
476 return String(characters16(), m_length);
479 inline AtomicString StringView::toAtomicString() const
482 return AtomicString(characters8(), m_length);
483 return AtomicString(characters16(), m_length);
486 inline float StringView::toFloat(bool& isValid) const
489 return charactersToFloat(characters8(), m_length, &isValid);
490 return charactersToFloat(characters16(), m_length, &isValid);
493 inline int StringView::toInt() const
496 return toInt(isValid);
499 inline int StringView::toInt(bool& isValid) const
502 return charactersToInt(characters8(), m_length, &isValid);
503 return charactersToInt(characters16(), m_length, &isValid);
506 inline int StringView::toIntStrict(bool& isValid) const
509 return charactersToIntStrict(characters8(), m_length, &isValid);
510 return charactersToIntStrict(characters16(), m_length, &isValid);
513 inline String StringView::toStringWithoutCopying() const
516 return StringImpl::createWithoutCopying(characters8(), m_length);
517 return StringImpl::createWithoutCopying(characters16(), m_length);
520 inline size_t StringView::find(UChar character, unsigned start) const
523 return WTF::find(characters8(), m_length, character, start);
524 return WTF::find(characters16(), m_length, character, start);
527 inline size_t StringView::find(CharacterMatchFunction matchFunction, unsigned start) const
530 return WTF::find(characters8(), m_length, matchFunction, start);
531 return WTF::find(characters16(), m_length, matchFunction, start);
534 #if !CHECK_STRINGVIEW_LIFETIME
535 inline void StringView::invalidate(const StringImpl&)
540 template<typename StringType> class StringTypeAdapter;
542 template<> class StringTypeAdapter<StringView> {
544 StringTypeAdapter<StringView>(StringView string)
549 unsigned length() { return m_string.length(); }
550 bool is8Bit() { return m_string.is8Bit(); }
551 void writeTo(LChar* destination) { m_string.getCharactersWithUpconvert(destination); }
552 void writeTo(UChar* destination) { m_string.getCharactersWithUpconvert(destination); }
554 String toString() const { return m_string.toString(); }
560 template<typename CharacterType, size_t inlineCapacity> void append(Vector<CharacterType, inlineCapacity>& buffer, StringView string)
562 unsigned oldSize = buffer.size();
563 buffer.grow(oldSize + string.length());
564 string.getCharactersWithUpconvert(buffer.data() + oldSize);
567 inline bool equal(StringView a, StringView b)
569 if (a.m_characters == b.m_characters) {
570 ASSERT(a.is8Bit() == b.is8Bit());
571 return a.length() == b.length();
574 return equalCommon(a, b);
577 inline bool equal(StringView a, const LChar* b)
583 unsigned aLength = a.length();
585 return equal(a.characters8(), b, aLength);
586 return equal(a.characters16(), b, aLength);
589 inline bool equal(StringView a, const char* b)
591 return equal(a, reinterpret_cast<const LChar*>(b));
594 inline bool equalIgnoringASCIICase(StringView a, StringView b)
596 return equalIgnoringASCIICaseCommon(a, b);
599 inline bool equalIgnoringASCIICase(StringView a, const char* b)
601 return equalIgnoringASCIICaseCommon(a, b);
604 class StringView::GraphemeClusters {
606 explicit GraphemeClusters(const StringView&);
609 Iterator begin() const;
610 Iterator end() const;
613 StringView m_stringView;
616 class StringView::CodePoints {
618 explicit CodePoints(const StringView&);
621 Iterator begin() const;
622 Iterator end() const;
625 StringView m_stringView;
628 class StringView::CodeUnits {
630 explicit CodeUnits(const StringView&);
633 Iterator begin() const;
634 Iterator end() const;
637 StringView m_stringView;
640 class StringView::GraphemeClusters::Iterator {
642 WTF_EXPORT_PRIVATE Iterator() = delete;
643 WTF_EXPORT_PRIVATE Iterator(const StringView&, unsigned index);
644 WTF_EXPORT_PRIVATE ~Iterator();
646 Iterator(const Iterator&) = delete;
647 WTF_EXPORT_PRIVATE Iterator(Iterator&&);
648 Iterator& operator=(const Iterator&) = delete;
649 Iterator& operator=(Iterator&&) = delete;
651 WTF_EXPORT_PRIVATE StringView operator*() const;
652 WTF_EXPORT_PRIVATE Iterator& operator++();
654 WTF_EXPORT_PRIVATE bool operator==(const Iterator&) const;
655 WTF_EXPORT_PRIVATE bool operator!=(const Iterator&) const;
660 std::unique_ptr<Impl> m_impl;
663 class StringView::CodePoints::Iterator {
665 Iterator(const StringView&, unsigned index);
667 UChar32 operator*() const;
668 Iterator& operator++();
670 bool operator==(const Iterator&) const;
671 bool operator!=(const Iterator&) const;
672 Iterator& operator=(const Iterator&);
675 std::reference_wrapper<const StringView> m_stringView;
676 Optional<unsigned> m_nextCodePointOffset;
680 class StringView::CodeUnits::Iterator {
682 Iterator(const StringView&, unsigned index);
684 UChar operator*() const;
685 Iterator& operator++();
687 bool operator==(const Iterator&) const;
688 bool operator!=(const Iterator&) const;
691 const StringView& m_stringView;
695 inline auto StringView::graphemeClusters() const -> GraphemeClusters
697 return GraphemeClusters(*this);
700 inline auto StringView::codePoints() const -> CodePoints
702 return CodePoints(*this);
705 inline auto StringView::codeUnits() const -> CodeUnits
707 return CodeUnits(*this);
710 inline StringView::GraphemeClusters::GraphemeClusters(const StringView& stringView)
711 : m_stringView(stringView)
715 inline auto StringView::GraphemeClusters::begin() const -> Iterator
717 return Iterator(m_stringView, 0);
720 inline auto StringView::GraphemeClusters::end() const -> Iterator
722 return Iterator(m_stringView, m_stringView.length());
725 inline StringView::CodePoints::CodePoints(const StringView& stringView)
726 : m_stringView(stringView)
730 inline StringView::CodePoints::Iterator::Iterator(const StringView& stringView, unsigned index)
731 : m_stringView(stringView)
732 , m_nextCodePointOffset(index)
737 inline auto StringView::CodePoints::Iterator::operator++() -> Iterator&
739 ASSERT(m_nextCodePointOffset);
740 if (m_nextCodePointOffset.value() == m_stringView.get().length()) {
741 m_nextCodePointOffset = Nullopt;
744 if (m_stringView.get().is8Bit())
745 m_codePoint = m_stringView.get().characters8()[m_nextCodePointOffset.value()++];
747 U16_NEXT(m_stringView.get().characters16(), m_nextCodePointOffset.value(), m_stringView.get().length(), m_codePoint);
748 ASSERT(m_nextCodePointOffset.value() <= m_stringView.get().length());
752 inline auto StringView::CodePoints::Iterator::operator=(const Iterator& other) -> Iterator&
754 m_stringView = other.m_stringView;
755 m_nextCodePointOffset = other.m_nextCodePointOffset;
756 m_codePoint = other.m_codePoint;
760 inline UChar32 StringView::CodePoints::Iterator::operator*() const
762 ASSERT(m_nextCodePointOffset);
766 inline bool StringView::CodePoints::Iterator::operator==(const Iterator& other) const
768 ASSERT(&m_stringView.get() == &other.m_stringView.get());
769 return m_nextCodePointOffset == other.m_nextCodePointOffset;
772 inline bool StringView::CodePoints::Iterator::operator!=(const Iterator& other) const
774 return !(*this == other);
777 inline auto StringView::CodePoints::begin() const -> Iterator
779 return Iterator(m_stringView, 0);
782 inline auto StringView::CodePoints::end() const -> Iterator
784 return Iterator(m_stringView, m_stringView.length());
787 inline StringView::CodeUnits::CodeUnits(const StringView& stringView)
788 : m_stringView(stringView)
792 inline StringView::CodeUnits::Iterator::Iterator(const StringView& stringView, unsigned index)
793 : m_stringView(stringView)
798 inline auto StringView::CodeUnits::Iterator::operator++() -> Iterator&
804 inline UChar StringView::CodeUnits::Iterator::operator*() const
806 return m_stringView[m_index];
809 inline bool StringView::CodeUnits::Iterator::operator==(const Iterator& other) const
811 ASSERT(&m_stringView == &other.m_stringView);
812 return m_index == other.m_index;
815 inline bool StringView::CodeUnits::Iterator::operator!=(const Iterator& other) const
817 return !(*this == other);
820 inline auto StringView::CodeUnits::begin() const -> Iterator
822 return Iterator(m_stringView, 0);
825 inline auto StringView::CodeUnits::end() const -> Iterator
827 return Iterator(m_stringView, m_stringView.length());
830 template<unsigned length> inline bool equalLettersIgnoringASCIICase(StringView string, const char (&lowercaseLetters)[length])
832 return equalLettersIgnoringASCIICaseCommon(string, lowercaseLetters);
839 using WTF::StringView;
841 #endif // StringView_h