2 * Copyright (C) 2004 Apple Computer, Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #ifndef KHTML_EDITING_VISIBLE_TEXT_H
27 #define KHTML_EDITING_VISIBLE_TEXT_H
29 #include "dom/dom2_range.h"
35 // FIXME: Can't really answer this question without knowing the white-space mode.
36 // FIXME: Move this along with the white-space position functions above
37 // somewhere in the editing directory. It doesn't belong here.
38 inline bool isCollapsibleWhitespace(const QChar &c)
40 switch (c.unicode()) {
49 QString plainText(const DOM::Range &);
50 DOM::Range findPlainText(const DOM::Range &, const QString &, bool forward, bool caseSensitive);
52 // Iterates through the DOM range, returning all the text, and 0-length boundaries
53 // at points where replaced elements break up the text flow. The text comes back in
54 // chunks so as to optimize for performance of the iteration.
56 enum IteratorKind { CONTENT = 0, RUNFINDER = 1 };
62 explicit TextIterator(const DOM::Range &, IteratorKind kind = CONTENT );
64 bool atEnd() const { return !m_positionNode; }
67 long length() const { return m_textLength; }
68 const QChar *characters() const { return m_textCharacters; }
70 DOM::Range range() const;
72 static long TextIterator::rangeLength(const DOM::Range &r);
73 static void TextIterator::setRangeFromLocationAndLength (const DOM::Range &range, DOM::Range &resultRange, long rangeLocation, long rangeLength);
77 bool handleTextNode();
78 bool handleReplacedElement();
79 bool handleNonTextNode();
81 void emitCharacter(QChar, DOM::NodeImpl *textNode, DOM::NodeImpl *offsetBaseNode, long textStartOffset, long textEndOffset);
83 // Current position, not necessarily of the text being returned, but position
84 // as we walk through the DOM tree.
85 DOM::NodeImpl *m_node;
88 bool m_handledChildren;
91 DOM::NodeImpl *m_endContainer;
93 DOM::NodeImpl *m_pastEndNode;
95 // The current text and its position, in the form to be returned from the iterator.
96 DOM::NodeImpl *m_positionNode;
97 mutable DOM::NodeImpl *m_positionOffsetBaseNode;
98 mutable long m_positionStartOffset;
99 mutable long m_positionEndOffset;
100 const QChar *m_textCharacters;
103 // Used when there is still some pending text from the current node; when these
104 // are false and 0, we go back to normal iterating.
105 bool m_needAnotherNewline;
106 InlineTextBox *m_textBox;
108 // Used to do the whitespace collapsing logic.
109 DOM::NodeImpl *m_lastTextNode;
110 bool m_lastTextNodeEndedWithCollapsedSpace;
111 QChar m_lastCharacter;
113 // Used for whitespace characters that aren't in the DOM, so we can point at them.
114 QChar m_singleCharacterBuffer;
117 // Iterates through the DOM range, returning all the text, and 0-length boundaries
118 // at points where replaced elements break up the text flow. The text comes back in
119 // chunks so as to optimize for performance of the iteration.
120 class SimplifiedBackwardsTextIterator
123 SimplifiedBackwardsTextIterator();
124 explicit SimplifiedBackwardsTextIterator(const DOM::Range &);
126 bool atEnd() const { return !m_positionNode; }
129 long length() const { return m_textLength; }
130 const QChar *characters() const { return m_textCharacters; }
132 DOM::Range range() const;
136 bool handleTextNode();
137 bool handleReplacedElement();
138 bool handleNonTextNode();
139 void emitCharacter(QChar, DOM::NodeImpl *Node, long startOffset, long endOffset);
140 void emitNewlineForBROrText();
142 // Current position, not necessarily of the text being returned, but position
143 // as we walk through the DOM tree.
144 DOM::NodeImpl *m_node;
147 bool m_handledChildren;
150 DOM::NodeImpl *m_startNode;
153 // The current text and its position, in the form to be returned from the iterator.
154 DOM::NodeImpl *m_positionNode;
155 long m_positionStartOffset;
156 long m_positionEndOffset;
157 const QChar *m_textCharacters;
160 // Used to do the whitespace logic.
161 DOM::NodeImpl *m_lastTextNode;
162 QChar m_lastCharacter;
164 // Used for whitespace characters that aren't in the DOM, so we can point at them.
165 QChar m_singleCharacterBuffer;
168 // Builds on the text iterator, adding a character position so we can walk one
169 // character at a time, or faster, as needed. Useful for searching.
170 class CharacterIterator {
173 explicit CharacterIterator(const DOM::Range &r);
175 void advance(long numCharacters);
177 bool atBreak() const { return m_atBreak; }
178 bool atEnd() const { return m_textIterator.atEnd(); }
180 long length() const { return m_textIterator.length() - m_runOffset; }
181 const QChar *characters() const { return m_textIterator.characters() + m_runOffset; }
182 QString string(long numChars);
184 long characterOffset() const { return m_offset; }
185 DOM::Range range() const;
192 TextIterator m_textIterator;
195 // Very similar to the TextIterator, except that the chunks of text returned are "well behaved",
196 // meaning they never end split up a word. This is useful for spellcheck or (perhaps one day) searching.
197 class WordAwareIterator {
200 explicit WordAwareIterator(const DOM::Range &r);
202 bool atEnd() const { return !m_didLookAhead && m_textIterator.atEnd(); }
206 const QChar *characters() const;
208 // Range of the text we're currently returning
209 DOM::Range range() const { return m_range; }
212 // text from the previous chunk from the textIterator
213 const QChar *m_previousText;
214 long m_previousLength;
216 // many chunks from textIterator concatenated
219 // Did we have to look ahead in the textIterator to confirm the current chunk?
224 TextIterator m_textIterator;