2 * Copyright (C) 2004 Apple Computer, Inc. All rights reserved.
3 * Copyright (C) 2005 Alexey Proskuryakov.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #include "TextIterator.h"
32 #include "HTMLNames.h"
33 #include "htmlediting.h"
34 #include "InlineTextBox.h"
37 #include "RenderTableCell.h"
38 #include "RenderTableRow.h"
44 using namespace HTMLNames;
46 const UChar nonBreakingSpace = 0xA0;
48 // Buffer that knows how to compare with a search target.
49 // Keeps enough of the previous text to be able to search in the future,
51 class CircularSearchBuffer {
53 CircularSearchBuffer(const String& target, bool isCaseSensitive);
54 ~CircularSearchBuffer() { fastFree(m_buffer); }
56 void clear() { m_cursor = m_buffer; m_bufferFull = false; }
57 void append(int length, const UChar* characters);
60 int neededCharacters() const;
62 int length() const { return m_target.length(); }
66 bool m_isCaseSensitive;
72 CircularSearchBuffer(const CircularSearchBuffer&);
73 CircularSearchBuffer &operator=(const CircularSearchBuffer&);
76 TextIterator::TextIterator() : m_endContainer(0), m_endOffset(0), m_positionNode(0), m_lastCharacter(0)
80 TextIterator::TextIterator(const Range *r, IteratorKind kind) : m_endContainer(0), m_endOffset(0), m_positionNode(0)
87 // get and validate the range endpoints
88 Node *startContainer = r->startContainer(ec);
89 int startOffset = r->startOffset(ec);
90 Node *endContainer = r->endContainer(ec);
91 int endOffset = r->endOffset(ec);
95 // remember ending place - this does not change
96 m_endContainer = endContainer;
97 m_endOffset = endOffset;
99 // set up the current node for processing
100 m_node = r->startNode();
103 m_offset = m_node == startContainer ? startOffset : 0;
104 m_handledNode = false;
105 m_handledChildren = false;
107 // calculate first out of bounds node
108 m_pastEndNode = r->pastEndNode();
110 // initialize node processing state
111 m_needAnotherNewline = false;
114 // initialize record of previous node processing
116 m_lastTextNodeEndedWithCollapsedSpace = false;
117 if (kind == RUNFINDER)
120 m_lastCharacter = '\n';
123 // need this just because of the assert in advance()
124 m_positionNode = m_node;
127 // identify the first run
131 void TextIterator::advance()
133 // reset the run information
137 // handle remembered node that needed a newline after the text node's newline
138 if (m_needAnotherNewline) {
139 // emit the newline, with position a collapsed range at the end of current node.
140 emitCharacter('\n', m_node->parentNode(), m_node, 1, 1);
141 m_needAnotherNewline = false;
145 // handle remembered text box
152 while (m_node && m_node != m_pastEndNode && !(m_node == m_endContainer && m_endOffset == 0)) {
153 // handle current node according to its type
154 if (!m_handledNode) {
155 RenderObject *renderer = m_node->renderer();
156 if (renderer && renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) {
157 // FIXME: What about CDATA_SECTION_NODE?
158 if (renderer->style()->visibility() == VISIBLE)
159 m_handledNode = handleTextNode();
160 } else if (renderer && (renderer->isImage() || renderer->isWidget() || (renderer->element() && renderer->element()->isControl()))) {
161 if (renderer->style()->visibility() == VISIBLE)
162 m_handledNode = handleReplacedElement();
164 m_handledNode = handleNonTextNode();
169 // find a new current node to handle in depth-first manner,
170 // calling exitNode() as we come back thru a parent node
171 Node *next = m_handledChildren ? 0 : m_node->firstChild();
174 next = m_node->nextSibling();
176 if (m_node->traverseNextNode() == m_pastEndNode)
178 while (!next && m_node->parentNode()) {
179 m_node = m_node->parentNode();
181 if (m_positionNode) {
182 m_handledNode = true;
183 m_handledChildren = true;
186 next = m_node->nextSibling();
191 // set the new current node
193 m_handledNode = false;
194 m_handledChildren = false;
196 // how would this ever be?
202 static inline bool compareBoxStart(const InlineTextBox *first, const InlineTextBox *second)
204 return first->start() < second->start();
207 bool TextIterator::handleTextNode()
209 m_lastTextNode = m_node;
211 RenderText *renderer = static_cast<RenderText *>(m_node->renderer());
212 String str = renderer->string();
214 // handle pre-formatted text
215 if (!renderer->style()->collapseWhiteSpace()) {
216 int runStart = m_offset;
217 if (m_lastTextNodeEndedWithCollapsedSpace) {
218 emitCharacter(' ', m_node, 0, runStart, runStart);
221 int strLength = str.length();
222 int end = (m_node == m_endContainer) ? m_endOffset : LONG_MAX;
223 int runEnd = min(strLength, end);
225 if (runStart >= runEnd)
228 m_positionNode = m_node;
229 m_positionOffsetBaseNode = 0;
230 m_positionStartOffset = runStart;
231 m_positionEndOffset = runEnd;
232 m_textCharacters = str.characters() + runStart;
233 m_textLength = runEnd - runStart;
235 m_lastCharacter = str[runEnd - 1];
240 if (!renderer->firstTextBox() && str.length() > 0) {
241 m_lastTextNodeEndedWithCollapsedSpace = true; // entire block is collapsed space
245 // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
246 if (renderer->containsReversedText()) {
247 m_sortedTextBoxes.clear();
248 for (InlineTextBox * textBox = renderer->firstTextBox(); textBox; textBox = textBox->nextTextBox()) {
249 m_sortedTextBoxes.append(textBox);
251 std::sort(m_sortedTextBoxes.begin(), m_sortedTextBoxes.end(), compareBoxStart);
252 m_sortedTextBoxesPosition = 0;
255 m_textBox = renderer->containsReversedText() ? m_sortedTextBoxes[0] : renderer->firstTextBox();
260 void TextIterator::handleTextBox()
262 RenderText *renderer = static_cast<RenderText *>(m_node->renderer());
263 String str = renderer->string();
264 int start = m_offset;
265 int end = (m_node == m_endContainer) ? m_endOffset : LONG_MAX;
267 int textBoxStart = m_textBox->m_start;
268 int runStart = max(textBoxStart, start);
270 // Check for collapsed space at the start of this run.
271 InlineTextBox *firstTextBox = renderer->containsReversedText() ? m_sortedTextBoxes[0] : renderer->firstTextBox();
272 bool needSpace = m_lastTextNodeEndedWithCollapsedSpace
273 || (m_textBox == firstTextBox && textBoxStart == runStart && runStart > 0);
274 if (needSpace && !isCollapsibleWhitespace(m_lastCharacter) && m_lastCharacter) {
275 emitCharacter(' ', m_node, 0, runStart, runStart);
278 int textBoxEnd = textBoxStart + m_textBox->m_len;
279 int runEnd = min(textBoxEnd, end);
281 // Determine what the next text box will be, but don't advance yet
282 InlineTextBox *nextTextBox = 0;
283 if (renderer->containsReversedText()) {
284 if (m_sortedTextBoxesPosition + 1 < m_sortedTextBoxes.size())
285 nextTextBox = m_sortedTextBoxes[m_sortedTextBoxesPosition + 1];
287 nextTextBox = m_textBox->nextTextBox();
289 if (runStart < runEnd) {
290 // Handle either a single newline character (which becomes a space),
291 // or a run of characters that does not include a newline.
292 // This effectively translates newlines to spaces without copying the text.
293 if (str[runStart] == '\n') {
294 emitCharacter(' ', m_node, 0, runStart, runStart + 1);
295 m_offset = runStart + 1;
297 int subrunEnd = str.find('\n', runStart);
298 if (subrunEnd == -1 || subrunEnd > runEnd)
301 m_offset = subrunEnd;
303 m_positionNode = m_node;
304 m_positionOffsetBaseNode = 0;
305 m_positionStartOffset = runStart;
306 m_positionEndOffset = subrunEnd;
307 m_textCharacters = str.characters() + runStart;
308 m_textLength = subrunEnd - runStart;
310 m_lastTextNodeEndedWithCollapsedSpace = false;
311 m_lastCharacter = str[subrunEnd - 1];
314 // If we are doing a subrun that doesn't go to the end of the text box,
315 // come back again to finish handling this text box; don't advance to the next one.
316 if (m_positionEndOffset < textBoxEnd)
319 // Advance and return
320 int nextRunStart = nextTextBox ? nextTextBox->m_start : str.length();
321 if (nextRunStart > runEnd)
322 m_lastTextNodeEndedWithCollapsedSpace = true; // collapsed space between runs or at the end
323 m_textBox = nextTextBox;
324 if (renderer->containsReversedText())
325 ++m_sortedTextBoxesPosition;
328 // Advance and continue
329 m_textBox = nextTextBox;
330 if (renderer->containsReversedText())
331 ++m_sortedTextBoxesPosition;
335 bool TextIterator::handleReplacedElement()
337 if (m_lastTextNodeEndedWithCollapsedSpace) {
338 emitCharacter(' ', m_lastTextNode->parentNode(), m_lastTextNode, 1, 1);
342 m_positionNode = m_node->parentNode();
343 m_positionOffsetBaseNode = m_node;
344 m_positionStartOffset = 0;
345 m_positionEndOffset = 1;
347 m_textCharacters = 0;
355 static bool isTableCell(Node* node)
357 RenderObject* r = node->renderer();
359 return node->hasTagName(tdTag) || node->hasTagName(thTag);
361 return r->isTableCell();
364 static bool shouldEmitTabBeforeNode(Node* node)
366 RenderObject* r = node->renderer();
368 // Table cells are delimited by tabs.
369 if (!r || !isTableCell(node))
372 // Want a tab before every cell other than the first one
373 RenderTableCell* rc = static_cast<RenderTableCell*>(r);
374 RenderTable* t = rc->table();
375 return t && (t->cellBefore(rc) || t->cellAbove(rc));
378 static bool shouldEmitSpaceBeforeAndAfterNode(Node* node)
380 RenderObject* r = node->renderer();
382 return r && r->isTable() && r->isInline();
385 static bool shouldEmitNewlineForNode(Node* node)
387 // br elements are represented by a single newline.
388 RenderObject* r = node->renderer();
390 return node->hasTagName(brTag);
395 static bool shouldEmitNewlinesBeforeAndAfterNode(Node* node)
397 // Block flow (versus inline flow) is represented by having
398 // a newline both before and after the element.
399 RenderObject* r = node->renderer();
401 return (node->hasTagName(blockquoteTag)
402 || node->hasTagName(ddTag)
403 || node->hasTagName(divTag)
404 || node->hasTagName(dlTag)
405 || node->hasTagName(dtTag)
406 || node->hasTagName(h1Tag)
407 || node->hasTagName(h2Tag)
408 || node->hasTagName(h3Tag)
409 || node->hasTagName(h4Tag)
410 || node->hasTagName(h5Tag)
411 || node->hasTagName(h6Tag)
412 || node->hasTagName(hrTag)
413 || node->hasTagName(liTag)
414 || node->hasTagName(listingTag)
415 || node->hasTagName(olTag)
416 || node->hasTagName(pTag)
417 || node->hasTagName(preTag)
418 || node->hasTagName(trTag)
419 || node->hasTagName(ulTag));
422 // Need to make an exception for table cells, because they are blocks, but we
423 // want them tab-delimited rather than having newlines before and after.
424 if (isTableCell(node))
427 // Need to make an exception for table row elements, because they are neither
428 // "inline" or "RenderBlock", but we want newlines for them.
429 if (r->isTableRow()) {
430 RenderTable* t = static_cast<RenderTableRow*>(r)->table();
431 if (t && !t->isInline())
435 // Check for non-inline block
436 return !r->isInline() && r->isRenderBlock() && !r->isBody();
439 static bool shouldEmitExtraNewlineForNode(Node* node)
441 // When there is a significant collapsed bottom margin, emit an extra
442 // newline for a more realistic result. We end up getting the right
443 // result even without margin collapsing. For example: <div><p>text</p></div>
444 // will work right even if both the <div> and the <p> have bottom margins.
445 RenderObject* r = node->renderer();
449 // NOTE: We only do this for a select set of nodes, and fwiw WinIE appears
450 // not to do this at all
451 if (node->hasTagName(h1Tag)
452 || node->hasTagName(h2Tag)
453 || node->hasTagName(h3Tag)
454 || node->hasTagName(h4Tag)
455 || node->hasTagName(h5Tag)
456 || node->hasTagName(h6Tag)
457 || node->hasTagName(pTag)) {
458 RenderStyle* style = r->style();
460 int bottomMargin = r->collapsedMarginBottom();
461 int fontSize = style->fontDescription().computedPixelSize();
462 if (bottomMargin * 2 >= fontSize)
470 bool TextIterator::handleNonTextNode()
472 if (shouldEmitNewlineForNode(m_node)) {
473 emitCharacter('\n', m_node->parentNode(), m_node, 0, 1);
474 } else if (m_lastCharacter != '\n' && m_lastTextNode) {
475 // only add the tab or newline if not at the start of a line
476 if (shouldEmitTabBeforeNode(m_node))
477 emitCharacter('\t', m_lastTextNode->parentNode(), m_lastTextNode, 0, 1);
478 else if (shouldEmitNewlinesBeforeAndAfterNode(m_node))
479 emitCharacter('\n', m_lastTextNode->parentNode(), m_lastTextNode, 0, 1);
480 else if (shouldEmitSpaceBeforeAndAfterNode(m_node))
481 emitCharacter(' ', m_lastTextNode->parentNode(), m_lastTextNode, 0, 1);
487 void TextIterator::exitNode()
489 if (m_lastTextNode && shouldEmitNewlinesBeforeAndAfterNode(m_node)) {
490 // use extra newline to represent margin bottom, as needed
491 bool addNewline = shouldEmitExtraNewlineForNode(m_node);
493 if (m_lastCharacter != '\n') {
494 // insert a newline with a position following this block
495 emitCharacter('\n', m_node->parentNode(), m_node, 1, 1);
497 // remember whether to later add a newline for the current node
498 assert(!m_needAnotherNewline);
499 m_needAnotherNewline = addNewline;
500 } else if (addNewline) {
501 // insert a newline with a position following this block
502 emitCharacter('\n', m_node->parentNode(), m_node, 1, 1);
504 } else if (shouldEmitSpaceBeforeAndAfterNode(m_node))
505 emitCharacter(' ', m_node->parentNode(), m_node, 1, 1);
508 void TextIterator::emitCharacter(UChar c, Node *textNode, Node *offsetBaseNode, int textStartOffset, int textEndOffset)
510 // remember information with which to construct the TextIterator::range()
511 // NOTE: textNode is often not a text node, so the range will specify child nodes of positionNode
512 m_positionNode = textNode;
513 m_positionOffsetBaseNode = offsetBaseNode;
514 m_positionStartOffset = textStartOffset;
515 m_positionEndOffset = textEndOffset;
517 // remember information with which to construct the TextIterator::characters() and length()
518 m_singleCharacterBuffer = c;
519 m_textCharacters = &m_singleCharacterBuffer;
522 // remember some iteration state
523 m_lastTextNodeEndedWithCollapsedSpace = false;
527 PassRefPtr<Range> TextIterator::range() const
529 // use the current run information, if we have it
530 if (m_positionNode) {
531 if (m_positionOffsetBaseNode) {
532 int index = m_positionOffsetBaseNode->nodeIndex();
533 m_positionStartOffset += index;
534 m_positionEndOffset += index;
535 m_positionOffsetBaseNode = 0;
537 return new Range(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset);
540 // otherwise, return the end of the overall range we were given
542 return new Range(m_endContainer->document(), m_endContainer, m_endOffset, m_endContainer, m_endOffset);
547 SimplifiedBackwardsTextIterator::SimplifiedBackwardsTextIterator() : m_positionNode(0)
551 SimplifiedBackwardsTextIterator::SimplifiedBackwardsTextIterator(const Range *r)
559 Node *startNode = r->startContainer(exception);
562 Node *endNode = r->endContainer(exception);
565 int startOffset = r->startOffset(exception);
568 int endOffset = r->endOffset(exception);
572 if (!startNode->offsetInCharacters()) {
573 if (startOffset >= 0 && startOffset < static_cast<int>(startNode->childNodeCount())) {
574 startNode = startNode->childNode(startOffset);
578 if (!endNode->offsetInCharacters()) {
579 if (endOffset > 0 && endOffset <= static_cast<int>(endNode->childNodeCount())) {
580 endNode = endNode->childNode(endOffset - 1);
581 endOffset = endNode->hasChildNodes() ? endNode->childNodeCount() : endNode->maxOffset();
586 m_offset = endOffset;
587 m_handledNode = false;
588 m_handledChildren = endOffset == 0;
590 m_startNode = startNode;
591 m_startOffset = startOffset;
594 // Need this just because of the assert.
595 m_positionNode = endNode;
599 m_lastCharacter = '\n';
604 void SimplifiedBackwardsTextIterator::advance()
606 assert(m_positionNode);
612 if (!m_handledNode) {
613 RenderObject *renderer = m_node->renderer();
614 if (renderer && renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) {
615 // FIXME: What about CDATA_SECTION_NODE?
616 if (renderer->style()->visibility() == VISIBLE && m_offset > 0)
617 m_handledNode = handleTextNode();
618 } else if (renderer && (renderer->isImage() || renderer->isWidget())) {
619 if (renderer->style()->visibility() == VISIBLE && m_offset > 0)
620 m_handledNode = handleReplacedElement();
622 m_handledNode = handleNonTextNode();
627 if (m_node == m_startNode)
631 if (!m_handledChildren) {
632 next = m_node->lastChild();
633 while (next && next->lastChild())
634 next = next->lastChild();
635 m_handledChildren = true;
637 if (!next && m_node != m_startNode) {
638 next = m_node->previousSibling();
641 while (next->lastChild())
642 next = next->lastChild();
644 else if (m_node->parentNode()) {
645 next = m_node->parentNode();
650 // Check for leaving a node and iterating backwards
651 // into a different block that is an descendent of the
652 // block containing the node (as in leaving
653 // the "bar" node in this example: <p>foo</p>bar).
654 // Must emit newline when leaving node containing "bar".
655 if (next && m_node->renderer() && m_node->renderer()->style()->visibility() == VISIBLE) {
656 Node *block = m_node->enclosingBlockFlowElement();
658 Node *nextBlock = next->enclosingBlockFlowElement();
659 if (nextBlock && nextBlock->isAncestor(block))
666 m_offset = m_node->caretMaxOffset();
669 m_handledNode = false;
676 bool SimplifiedBackwardsTextIterator::handleTextNode()
678 m_lastTextNode = m_node;
680 RenderText *renderer = static_cast<RenderText *>(m_node->renderer());
681 String str = renderer->string();
683 if (!renderer->firstTextBox() && str.length() > 0)
686 m_positionEndOffset = m_offset;
688 m_offset = (m_node == m_startNode) ? m_startOffset : 0;
689 m_positionNode = m_node;
690 m_positionStartOffset = m_offset;
691 m_textLength = m_positionEndOffset - m_positionStartOffset;
692 m_textCharacters = str.characters() + m_positionStartOffset;
694 m_lastCharacter = str[m_positionEndOffset - 1];
699 bool SimplifiedBackwardsTextIterator::handleReplacedElement()
701 int offset = m_node->nodeIndex();
703 m_positionNode = m_node->parentNode();
704 m_positionStartOffset = offset;
705 m_positionEndOffset = offset + 1;
707 m_textCharacters = 0;
715 bool SimplifiedBackwardsTextIterator::handleNonTextNode()
717 // We can use a linefeed in place of a tab because this simple iterator is only used to
718 // find boundaries, not actual content. A linefeed breaks words, sentences, and paragraphs.
719 if (shouldEmitNewlineForNode(m_node) ||
720 shouldEmitNewlinesBeforeAndAfterNode(m_node) ||
721 shouldEmitTabBeforeNode(m_node))
727 void SimplifiedBackwardsTextIterator::exitNode()
729 // Left this function in so that the name and usage patterns remain similar to
730 // TextIterator. However, the needs of this iterator are much simpler, and
731 // the handleNonTextNode() function does just what we want (i.e. insert a
732 // BR for certain node types to "break up" text so that it does not seem
733 // like content is next to other text when it really isn't).
737 void SimplifiedBackwardsTextIterator::emitCharacter(UChar c, Node *node, int startOffset, int endOffset)
739 m_singleCharacterBuffer = c;
740 m_positionNode = node;
741 m_positionStartOffset = startOffset;
742 m_positionEndOffset = endOffset;
743 m_textCharacters = &m_singleCharacterBuffer;
748 void SimplifiedBackwardsTextIterator::emitNewline()
752 if (m_lastTextNode) {
753 offset = m_lastTextNode->nodeIndex();
754 emitCharacter('\n', m_lastTextNode->parentNode(), offset, offset + 1);
756 offset = m_node->nodeIndex();
757 emitCharacter('\n', m_node->parentNode(), offset, offset + 1);
761 PassRefPtr<Range> SimplifiedBackwardsTextIterator::range() const
764 return new Range(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset);
766 return new Range(m_startNode->document(), m_startNode, m_startOffset, m_startNode, m_startOffset);
769 CharacterIterator::CharacterIterator()
770 : m_offset(0), m_runOffset(0), m_atBreak(true)
774 CharacterIterator::CharacterIterator(const Range *r)
775 : m_offset(0), m_runOffset(0), m_atBreak(true), m_textIterator(r, RUNFINDER)
777 while (!atEnd() && m_textIterator.length() == 0) {
778 m_textIterator.advance();
782 PassRefPtr<Range> CharacterIterator::range() const
784 RefPtr<Range> r = m_textIterator.range();
785 if (!m_textIterator.atEnd()) {
786 if (m_textIterator.length() <= 1) {
787 assert(m_runOffset == 0);
790 Node *n = r->startContainer(exception);
791 assert(n == r->endContainer(exception));
792 int offset = r->startOffset(exception) + m_runOffset;
793 r->setStart(n, offset, exception);
794 r->setEnd(n, offset + 1, exception);
800 void CharacterIterator::advance(int count)
804 // easy if there is enough left in the current m_textIterator run
805 int remaining = m_textIterator.length() - m_runOffset;
806 if (count < remaining) {
807 m_runOffset += count;
812 // exhaust the current m_textIterator run
814 m_offset += remaining;
816 // move to a subsequent m_textIterator run
817 for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) {
818 int runLength = m_textIterator.length();
822 // see whether this is m_textIterator to use
823 if (count < runLength) {
829 // exhaust this m_textIterator run
831 m_offset += runLength;
835 // ran to the end of the m_textIterator... no more runs left
840 DeprecatedString CharacterIterator::string(int numChars)
842 DeprecatedString result;
843 result.reserve(numChars);
844 while (numChars > 0 && !atEnd()) {
845 int runSize = min(numChars, length());
846 result.append(reinterpret_cast<const DeprecatedChar*>(characters()), runSize);
853 WordAwareIterator::WordAwareIterator()
854 : m_previousText(0), m_didLookAhead(false)
858 WordAwareIterator::WordAwareIterator(const Range *r)
859 : m_previousText(0), m_didLookAhead(false), m_textIterator(r)
861 m_didLookAhead = true; // so we consider the first chunk from the text iterator
862 advance(); // get in position over the first chunk of text
865 // We're always in one of these modes:
866 // - The current chunk in the text iterator is our current chunk
867 // (typically its a piece of whitespace, or text that ended with whitespace)
868 // - The previous chunk in the text iterator is our current chunk
869 // (we looked ahead to the next chunk and found a word boundary)
870 // - We built up our own chunk of text from many chunks from the text iterator
872 //FIXME: Perf could be bad for huge spans next to each other that don't fall on word boundaries
874 void WordAwareIterator::advance()
877 m_buffer = ""; // toss any old buffer we built up
879 // If last time we did a look-ahead, start with that looked-ahead chunk now
880 if (!m_didLookAhead) {
881 assert(!m_textIterator.atEnd());
882 m_textIterator.advance();
884 m_didLookAhead = false;
886 // Go to next non-empty chunk
887 while (!m_textIterator.atEnd() && m_textIterator.length() == 0) {
888 m_textIterator.advance();
890 m_range = m_textIterator.range();
892 if (m_textIterator.atEnd())
896 // If this chunk ends in whitespace we can just use it as our chunk.
897 if (DeprecatedChar(m_textIterator.characters()[m_textIterator.length() - 1]).isSpace())
900 // If this is the first chunk that failed, save it in previousText before look ahead
901 if (m_buffer.isEmpty()) {
902 m_previousText = m_textIterator.characters();
903 m_previousLength = m_textIterator.length();
906 // Look ahead to next chunk. If it is whitespace or a break, we can use the previous stuff
907 m_textIterator.advance();
908 if (m_textIterator.atEnd() || m_textIterator.length() == 0 || DeprecatedChar(m_textIterator.characters()[0]).isSpace()) {
909 m_didLookAhead = true;
913 if (m_buffer.isEmpty()) {
914 // Start gobbling chunks until we get to a suitable stopping point
915 m_buffer.append(reinterpret_cast<const DeprecatedChar*>(m_previousText), m_previousLength);
918 m_buffer.append(reinterpret_cast<const DeprecatedChar*>(m_textIterator.characters()), m_textIterator.length());
920 m_range->setEnd(m_textIterator.range()->endContainer(exception), m_textIterator.range()->endOffset(exception), exception);
924 int WordAwareIterator::length() const
926 if (!m_buffer.isEmpty())
927 return m_buffer.length();
929 return m_previousLength;
930 return m_textIterator.length();
933 const UChar* WordAwareIterator::characters() const
935 if (!m_buffer.isEmpty())
936 return reinterpret_cast<const UChar*>(m_buffer.unicode());
938 return m_previousText;
939 return m_textIterator.characters();
942 CircularSearchBuffer::CircularSearchBuffer(const String& s, bool isCaseSensitive)
945 assert(!s.isEmpty());
947 if (!isCaseSensitive)
948 m_target = s.foldCase();
949 m_target.replace(nonBreakingSpace, ' ');
950 m_isCaseSensitive = isCaseSensitive;
952 m_buffer = static_cast<UChar*>(fastMalloc(s.length() * sizeof(UChar)));
954 m_bufferFull = false;
957 void CircularSearchBuffer::append(UChar c)
959 if (m_isCaseSensitive)
960 *m_cursor++ = c == nonBreakingSpace ? ' ' : c;
962 *m_cursor++ = c == nonBreakingSpace ? ' ' : u_foldCase(c, U_FOLD_CASE_DEFAULT);
963 if (m_cursor == m_buffer + length()) {
969 // This function can only be used when the buffer is not yet full,
970 // and when then count is small enough to fit in the buffer.
971 // No need for a more general version for the search algorithm.
972 void CircularSearchBuffer::append(int count, const UChar* characters)
974 int tailSpace = m_buffer + length() - m_cursor;
976 assert(!m_bufferFull);
977 assert(count <= tailSpace);
979 if (m_isCaseSensitive) {
980 for (int i = 0; i != count; ++i) {
981 UChar c = characters[i];
982 m_cursor[i] = c == nonBreakingSpace ? ' ' : c;
985 for (int i = 0; i != count; ++i) {
986 UChar c = characters[i];
987 m_cursor[i] = c == nonBreakingSpace ? ' ' : u_foldCase(c, U_FOLD_CASE_DEFAULT);
990 if (count < tailSpace)
998 int CircularSearchBuffer::neededCharacters() const
1000 return m_bufferFull ? 0 : m_buffer + length() - m_cursor;
1003 bool CircularSearchBuffer::isMatch() const
1005 assert(m_bufferFull);
1007 int headSpace = m_cursor - m_buffer;
1008 int tailSpace = length() - headSpace;
1009 return memcmp(m_cursor, m_target.characters(), tailSpace * sizeof(UChar)) == 0
1010 && memcmp(m_buffer, m_target.characters() + tailSpace, headSpace * sizeof(UChar)) == 0;
1013 int TextIterator::rangeLength(const Range *r)
1016 for (TextIterator it(r); !it.atEnd(); it.advance()) {
1017 length += it.length();
1022 PassRefPtr<Range> TextIterator::rangeFromLocationAndLength(Document *doc, int rangeLocation, int rangeLength)
1024 RefPtr<Range> resultRange = doc->createRange();
1026 int docTextPosition = 0;
1027 int rangeEnd = rangeLocation + rangeLength;
1028 bool startRangeFound = false;
1030 RefPtr<Range> textRunRange;
1032 TextIterator it(rangeOfContents(doc).get());
1034 // FIXME: the atEnd() check shouldn't be necessary, workaround for <http://bugzilla.opendarwin.org/show_bug.cgi?id=6289>.
1035 if (rangeLocation == 0 && rangeLength == 0 && it.atEnd()) {
1037 textRunRange = it.range();
1039 resultRange->setStart(textRunRange->startContainer(exception), 0, exception);
1040 assert(exception == 0);
1041 resultRange->setEnd(textRunRange->startContainer(exception), 0, exception);
1042 assert(exception == 0);
1044 return resultRange.release();
1047 for (; !it.atEnd(); it.advance()) {
1048 int len = it.length();
1049 textRunRange = it.range();
1051 if (rangeLocation >= docTextPosition && rangeLocation <= docTextPosition + len) {
1052 startRangeFound = true;
1054 if (textRunRange->startContainer(exception)->isTextNode()) {
1055 int offset = rangeLocation - docTextPosition;
1056 resultRange->setStart(textRunRange->startContainer(exception), offset + textRunRange->startOffset(exception), exception);
1058 if (rangeLocation == docTextPosition)
1059 resultRange->setStart(textRunRange->startContainer(exception), textRunRange->startOffset(exception), exception);
1061 resultRange->setStart(textRunRange->endContainer(exception), textRunRange->endOffset(exception), exception);
1065 if (rangeEnd >= docTextPosition && rangeEnd <= docTextPosition + len) {
1067 if (textRunRange->startContainer(exception)->isTextNode()) {
1068 int offset = rangeEnd - docTextPosition;
1069 resultRange->setEnd(textRunRange->startContainer(exception), offset + textRunRange->startOffset(exception), exception);
1071 if (rangeEnd == docTextPosition)
1072 resultRange->setEnd(textRunRange->startContainer(exception), textRunRange->startOffset(exception), exception);
1074 resultRange->setEnd(textRunRange->endContainer(exception), textRunRange->endOffset(exception), exception);
1076 if (!(rangeLength == 0 && rangeEnd == docTextPosition + len)) {
1077 docTextPosition += len;
1081 docTextPosition += len;
1084 if (!startRangeFound)
1087 if (rangeLength != 0 && rangeEnd > docTextPosition) { // rangeEnd is out of bounds
1089 resultRange->setEnd(textRunRange->endContainer(exception), textRunRange->endOffset(exception), exception);
1092 return resultRange.release();
1095 DeprecatedString plainText(const Range* r)
1097 DeprecatedString result("");
1098 for (TextIterator it(r); !it.atEnd(); it.advance())
1099 result.append(reinterpret_cast<const DeprecatedChar*>(it.characters()), it.length());
1103 PassRefPtr<Range> findPlainText(const Range *r, const String& s, bool forward, bool caseSensitive)
1105 // FIXME: Can we do Boyer-Moore or equivalent instead for speed?
1107 // FIXME: This code does not allow \n at the moment because of issues with <br>.
1108 // Once we fix those, we can remove this check.
1109 if (s.isEmpty() || s.find('\n') != -1) {
1111 RefPtr<Range> result = r->cloneRange(exception);
1112 result->collapse(forward, exception);
1113 return result.release();
1116 CircularSearchBuffer buffer(s, caseSensitive);
1119 CharacterIterator rangeEnd;
1122 CharacterIterator it(r);
1125 while (int needed = buffer.neededCharacters()) {
1131 int available = it.length();
1132 int runLength = min(needed, available);
1133 buffer.append(runLength, it.characters());
1134 it.advance(runLength);
1139 if (buffer.isMatch()) {
1140 // Compute the range for the result.
1143 // If searching forward, stop on the first match.
1144 // If searching backward, don't stop, so we end up with the last match.
1150 buffer.append(it.characters()[0]);
1159 RefPtr<Range> result = r->cloneRange(exception);
1161 result->collapse(!forward, exception);
1163 CharacterIterator it(r);
1164 it.advance(rangeEnd.characterOffset() - buffer.length());
1165 result->setStart(it.range()->startContainer(exception), it.range()->startOffset(exception), exception);
1166 it.advance(buffer.length() - 1);
1167 result->setEnd(it.range()->endContainer(exception), it.range()->endOffset(exception), exception);
1169 return result.release();