2 * Copyright (C) 2004, 2006, 2009, 2014 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 // FIXME: Move each iterator class into a separate header file.
30 #include "FindOptions.h"
32 #include "TextIteratorBehavior.h"
33 #include <wtf/Vector.h>
34 #include <wtf/text/StringView.h>
40 class RenderTextFragment;
41 namespace SimpleLineLayout {
45 WEBCORE_EXPORT String plainText(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior, bool isDisplayString = false);
46 WEBCORE_EXPORT String plainTextReplacingNoBreakSpace(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior, bool isDisplayString = false);
47 Ref<Range> findPlainText(const Range&, const String&, FindOptions);
49 // FIXME: Move this somewhere else in the editing directory. It doesn't belong here.
50 bool isRendererReplacedElement(RenderObject*);
61 unsigned size() const;
65 Vector<unsigned, 1> m_words;
68 class TextIteratorCopyableText {
70 TextIteratorCopyableText()
71 : m_singleCharacter(0)
77 StringView text() const { return m_singleCharacter ? StringView(&m_singleCharacter, 1) : StringView(m_string).substring(m_offset, m_length); }
78 void appendToStringBuilder(StringBuilder&) const;
82 void set(String&&, unsigned offset, unsigned length);
86 UChar m_singleCharacter;
92 // Iterates through the DOM range, returning all the text, and 0-length boundaries
93 // at points where replaced elements break up the text flow. The text is delivered in
94 // the chunks it's already stored in, to avoid copying any text.
98 WEBCORE_EXPORT explicit TextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
99 WEBCORE_EXPORT ~TextIterator();
101 bool atEnd() const { return !m_positionNode; }
102 WEBCORE_EXPORT void advance();
104 StringView text() const { ASSERT(!atEnd()); return m_text; }
105 WEBCORE_EXPORT Ref<Range> range() const;
106 WEBCORE_EXPORT Node* node() const;
108 const TextIteratorCopyableText& copyableText() const { ASSERT(!atEnd()); return m_copyableText; }
109 void appendTextToStringBuilder(StringBuilder& builder) const { copyableText().appendToStringBuilder(builder); }
111 WEBCORE_EXPORT static int rangeLength(const Range*, bool spacesForReplacedElements = false);
112 WEBCORE_EXPORT static RefPtr<Range> rangeFromLocationAndLength(ContainerNode* scope, int rangeLocation, int rangeLength, bool spacesForReplacedElements = false);
113 WEBCORE_EXPORT static bool getLocationAndLengthFromRange(Node* scope, const Range*, size_t& location, size_t& length);
114 WEBCORE_EXPORT static Ref<Range> subrange(Range* entireRange, int characterOffset, int characterCount);
117 void exitNode(Node*);
118 bool shouldRepresentNodeOffsetZero();
119 bool shouldEmitSpaceBeforeAndAfterNode(Node&);
120 void representNodeOffsetZero();
121 bool handleTextNode();
122 bool handleReplacedElement();
123 bool handleNonTextNode();
124 void handleTextBox();
125 void handleTextNodeFirstLetter(RenderTextFragment&);
126 void emitCharacter(UChar, Node& characterNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset);
127 void emitText(Text& textNode, RenderText&, int textStartOffset, int textEndOffset);
129 Node* baseNodeForEmittingNewLine() const;
131 const TextIteratorBehavior m_behavior { TextIteratorDefaultBehavior };
133 // Current position, not necessarily of the text being returned, but position as we walk through the DOM tree.
134 Node* m_node { nullptr };
136 bool m_handledNode { false };
137 bool m_handledChildren { false };
138 BitStack m_fullyClippedStack;
141 Node* m_startContainer { nullptr };
142 int m_startOffset { 0 };
143 Node* m_endContainer { nullptr };
144 int m_endOffset { 0 };
145 Node* m_pastEndNode { nullptr };
147 // The current text and its position, in the form to be returned from the iterator.
148 Node* m_positionNode { nullptr };
149 mutable Node* m_positionOffsetBaseNode { nullptr };
150 mutable int m_positionStartOffset { 0 };
151 mutable int m_positionEndOffset { 0 };
152 TextIteratorCopyableText m_copyableText;
155 // Used when there is still some pending text from the current node; when these are false and null, we go back to normal iterating.
156 Node* m_nodeForAdditionalNewline { nullptr };
157 InlineTextBox* m_textBox { nullptr };
159 // Used when iterating over :first-letter text to save pointer to remaining text box.
160 InlineTextBox* m_remainingTextBox { nullptr };
162 // Used to point to RenderText object for :first-letter.
163 RenderText* m_firstLetterText { nullptr };
165 // Used to do the whitespace collapsing logic.
166 Text* m_lastTextNode { nullptr };
167 bool m_lastTextNodeEndedWithCollapsedSpace { false };
168 UChar m_lastCharacter { 0 };
170 // Used to do simple line layout run logic.
171 bool m_nextRunNeedsWhitespace { false };
172 unsigned m_accumulatedSimpleTextLengthInFlow { 0 };
173 Text* m_previousSimpleTextNodeInFlow { nullptr };
174 std::unique_ptr<SimpleLineLayout::RunResolver> m_flowRunResolverCache;
176 // Used when text boxes are out of order (Hebrew/Arabic with embedded LTR text)
177 Vector<InlineTextBox*> m_sortedTextBoxes;
178 size_t m_sortedTextBoxesPosition { 0 };
180 // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content
181 bool m_hasEmitted { false };
183 // Used when deciding text fragment created by :first-letter should be looked into.
184 bool m_handledFirstLetter { false };
187 // Iterates through the DOM range, returning all the text, and 0-length boundaries
188 // at points where replaced elements break up the text flow. The text comes back in
189 // chunks so as to optimize for performance of the iteration.
190 class SimplifiedBackwardsTextIterator {
192 explicit SimplifiedBackwardsTextIterator(const Range&);
194 bool atEnd() const { return !m_positionNode; }
197 StringView text() const { ASSERT(!atEnd()); return m_text; }
198 WEBCORE_EXPORT Ref<Range> range() const;
199 Node* node() const { ASSERT(!atEnd()); return m_node; }
203 bool handleTextNode();
204 RenderText* handleFirstLetter(int& startOffset, int& offsetInNode);
205 bool handleReplacedElement();
206 bool handleNonTextNode();
207 void emitCharacter(UChar, Node&, int startOffset, int endOffset);
208 bool advanceRespectingRange(Node*);
210 const TextIteratorBehavior m_behavior { TextIteratorDefaultBehavior };
212 // Current position, not necessarily of the text being returned, but position as we walk through the DOM tree.
213 Node* m_node { nullptr };
215 bool m_handledNode { false };
216 bool m_handledChildren { false };
217 BitStack m_fullyClippedStack;
220 Node* m_startContainer { nullptr };
221 int m_startOffset { 0 };
222 Node* m_endContainer { nullptr };
223 int m_endOffset { 0 };
225 // The current text and its position, in the form to be returned from the iterator.
226 Node* m_positionNode { nullptr };
227 int m_positionStartOffset { 0 };
228 int m_positionEndOffset { 0 };
229 TextIteratorCopyableText m_copyableText;
232 // Used to do the whitespace logic.
233 Text* m_lastTextNode { nullptr };
234 UChar m_lastCharacter { 0 };
236 // Whether m_node has advanced beyond the iteration range (i.e. m_startContainer).
237 bool m_havePassedStartContainer { false };
239 // Should handle first-letter renderer in the next call to handleTextNode.
240 bool m_shouldHandleFirstLetter { false };
243 // Builds on the text iterator, adding a character position so we can walk one
244 // character at a time, or faster, as needed. Useful for searching.
245 class CharacterIterator {
247 explicit CharacterIterator(const Range&, TextIteratorBehavior = TextIteratorDefaultBehavior);
249 bool atEnd() const { return m_underlyingIterator.atEnd(); }
250 void advance(int numCharacters);
252 StringView text() const { return m_underlyingIterator.text().substring(m_runOffset); }
253 Ref<Range> range() const;
255 bool atBreak() const { return m_atBreak; }
256 int characterOffset() const { return m_offset; }
259 TextIterator m_underlyingIterator;
266 class BackwardsCharacterIterator {
268 explicit BackwardsCharacterIterator(const Range&);
270 bool atEnd() const { return m_underlyingIterator.atEnd(); }
271 void advance(int numCharacters);
273 Ref<Range> range() const;
276 SimplifiedBackwardsTextIterator m_underlyingIterator;
283 // Similar to the TextIterator, except that the chunks of text returned are "well behaved", meaning
284 // they never split up a word. This is useful for spell checking and perhaps one day for searching as well.
285 class WordAwareIterator {
287 explicit WordAwareIterator(const Range&);
289 bool atEnd() const { return !m_didLookAhead && m_underlyingIterator.atEnd(); }
292 StringView text() const;
295 TextIterator m_underlyingIterator;
297 // Text from the previous chunk from the text iterator.
298 TextIteratorCopyableText m_previousText;
300 // Many chunks from text iterator concatenated.
301 Vector<UChar> m_buffer;
303 // Did we have to look ahead in the text iterator to confirm the current chunk?
307 } // namespace WebCore