2 This file is part of the KDE libraries
4 Copyright (C) 1997 Martin Jones (mjones@kde.org)
5 (C) 1997 Torben Weis (weis@kde.org)
6 (C) 1999,2001 Lars Knoll (knoll@kde.org)
7 (C) 2000,2001 Dirk Mueller (mueller@kde.org)
8 Copyright (C) 2004 Apple Computer, Inc.
10 This library is free software; you can redistribute it and/or
11 modify it under the terms of the GNU Library General Public
12 License as published by the Free Software Foundation; either
13 version 2 of the License, or (at your option) any later version.
15 This library is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 Library General Public License for more details.
20 You should have received a copy of the GNU Library General Public License
21 along with this library; see the file COPYING.LIB. If not, write to
22 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 Boston, MA 02111-1307, USA.
25 //----------------------------------------------------------------------------
27 // KDE HTML Widget -- HTML Parser
28 //#define PARSER_DEBUG
30 #include "html/htmlparser.h"
32 #include "dom/dom_exception.h"
34 #include "html/html_baseimpl.h"
35 #include "html/html_blockimpl.h"
36 #include "html/html_canvasimpl.h"
37 #include "html/html_documentimpl.h"
38 #include "html/html_elementimpl.h"
39 #include "html/html_formimpl.h"
40 #include "html/html_headimpl.h"
41 #include "html/html_imageimpl.h"
42 #include "html/html_inlineimpl.h"
43 #include "html/html_listimpl.h"
44 #include "html/html_miscimpl.h"
45 #include "html/html_tableimpl.h"
46 #include "html/html_objectimpl.h"
47 #include "xml/dom_textimpl.h"
48 #include "xml/dom_nodeimpl.h"
49 #include "misc/htmlhashes.h"
50 #include "html/htmltokenizer.h"
51 #include "khtmlview.h"
52 #include "khtml_part.h"
53 #include "css/cssproperties.h"
54 #include "css/cssvalues.h"
56 #include "rendering/render_object.h"
62 using namespace khtml;
64 //----------------------------------------------------------------------------
72 HTMLStackElem( int _id,
80 strayTableContent(false),
87 bool strayTableContent;
95 * The parser parses tokenized input into the document, building up the
96 * document tree. If the document is wellformed, parsing it is
98 * Unfortunately, people can't write wellformed HTML documents, so the parser
99 * has to be tolerant about errors.
101 * We have to take care of the following error conditions:
102 * 1. The element being added is explicitly forbidden inside some outer tag.
103 * In this case we should close all tags up to the one, which forbids
104 * the element, and add it afterwards.
105 * 2. We are not allowed to add the element directly. It could be, that
106 * the person writing the document forgot some tag inbetween (or that the
107 * tag inbetween is optional...) This could be the case with the following
108 * tags: HTML HEAD BODY TBODY TR TD LI (did I forget any?)
109 * 3. We wan't to add a block element inside to an inline element. Close all
110 * inline elements up to the next higher block element.
111 * 4. If this doesn't help close elements, until we are allowed to add the
112 * element or ignore the tag.
115 KHTMLParser::KHTMLParser(KHTMLView *_parent, DocumentPtr *doc, bool includesComments)
116 : current(0), currentIsReferenced(false), includesCommentsInDOM(includesComments)
118 //kdDebug( 6035 ) << "parser constructor" << endl;
123 HTMLWidget = _parent;
132 KHTMLParser::KHTMLParser(DOM::DocumentFragmentImpl *i, DocumentPtr *doc, bool includesComments)
133 : current(0), currentIsReferenced(false), includesCommentsInDOM(includesComments)
146 KHTMLParser::~KHTMLParser()
149 kdDebug( ) << "TIME: parsing time was = " << qt.elapsed() << endl;
162 void KHTMLParser::reset()
168 // before parsing, no tags are forbidden
169 memset(forbiddenTag, 0, sizeof(forbiddenTag));
172 haveFrameSet = false;
175 inStrayTableContent = 0;
186 void KHTMLParser::setCurrent(DOM::NodeImpl *newCurrent)
188 bool newCurrentIsReferenced = newCurrent && newCurrent != doc();
189 if (newCurrentIsReferenced)
191 if (currentIsReferenced)
193 current = newCurrent;
194 currentIsReferenced = newCurrentIsReferenced;
197 void KHTMLParser::parseToken(Token *t)
200 if(t->id == discard_until)
203 // do not skip </iframe>
204 if ( discard_until || current->id() + ID_CLOSE_TAG != t->id )
209 kdDebug( 6035 ) << "\n\n==> parser: processing token " << getTagName(t->id).string() << "(" << t->id << ")"
210 << " current = " << getTagName(current->id()).string() << "(" << current->id() << ")" << endl;
211 kdDebug(6035) << " inBody=" << inBody << " haveFrameSet=" << haveFrameSet << endl;
214 // holy shit. apparently some sites use </br> instead of <br>
215 // be compatible with IE and NS
216 if (t->id == ID_BR + ID_CLOSE_TAG && doc()->inCompatMode())
219 if (t->id > ID_CLOSE_TAG)
225 // ignore spaces, if we're not inside a paragraph or other inline code
226 if( t->id == ID_TEXT && t->text ) {
227 if(inBody && !skipMode() && current->id() != ID_STYLE
228 && current->id() != ID_TITLE && current->id() != ID_SCRIPT &&
229 !t->text->containsOnlyWhitespace())
232 kdDebug(6035) << "length="<< t->text->l << " text='" << QConstString(t->text->s, t->text->l).string() << "'" << endl;
236 NodeImpl *n = getElement(t);
237 // just to be sure, and to catch currently unimplemented stuff
244 if(n->isElementNode())
246 ElementImpl *e = static_cast<ElementImpl *>(n);
247 e->setAttributeMap(t->attrs);
249 // take care of optional close tags
250 if(endTag[e->id()] == DOM::OPTIONAL)
253 if (isHeaderTag(t->id))
254 // Do not allow two header tags to be nested if the intervening tags are inlines.
255 popNestedHeaderTag();
258 // if this tag is forbidden inside the current context, pop
259 // blocks until we are allowed to add it...
260 while (t->id <= ID_LAST_TAG && forbiddenTag[t->id]) {
262 kdDebug( 6035 ) << "t->id: " << t->id << " is forbidden :-( " << endl;
267 if (!insertNode(n, t->flat))
269 // we couldn't insert the node...
271 if(n->isElementNode())
273 ElementImpl *e = static_cast<ElementImpl *>(n);
274 e->setAttributeMap(0);
278 kdDebug( 6035 ) << "insertNode failed current=" << current->id() << ", new=" << n->id() << "!" << endl;
283 kdDebug( 6035 ) << " --> resetting map!" << endl;
290 kdDebug( 6035 ) << " --> resetting form!" << endl;
297 static bool isTableRelatedTag(int id)
299 return (id == ID_TR || id == ID_TD || id == ID_TABLE || id == ID_TBODY || id == ID_TFOOT || id == ID_THEAD ||
303 bool KHTMLParser::insertNode(NodeImpl *n, bool flat)
309 // let's be stupid and just try to insert it.
310 // this should work if the document is wellformed
312 NodeImpl *tmp = current;
314 NodeImpl *newNode = current->addChild(n);
317 kdDebug( 6035 ) << "added " << n->nodeName().string() << " to " << tmp->nodeName().string() << ", new current=" << newNode->nodeName().string() << endl;
319 // don't push elements without end tag on the stack
320 if(tagPriority[id] != 0 && !flat)
322 pushBlock(id, tagPriority[id]);
323 if (newNode == current)
328 if(!n->attached() && HTMLWidget)
334 if(!n->attached() && HTMLWidget)
336 if (n->maintainsState()) {
337 doc()->registerMaintainsState(n);
338 QStringList &states = doc()->restoreState();
339 if (!states.isEmpty())
340 n->restoreState(states);
348 kdDebug( 6035 ) << "ADDING NODE FAILED!!!! current = " << current->nodeName().string() << ", new = " << n->nodeName().string() << endl;
352 bool handled = false;
354 // switch according to the element to insert
360 if (inStrayTableContent && !isTableRelatedTag(current->id())) {
361 // pop out to the nearest enclosing table-related tag.
362 while (!isTableRelatedTag(current->id()))
364 return insertNode(n);
370 // ### alllow not having <HTML> in at all, as per HTML spec
371 if (!current->isDocumentNode() && current->id() != ID_HTML )
374 // We can deal with a base, meta and link element in the body, by just adding the element to head.
381 if ( head->addChild(n) ) {
383 if(!n->attached() && HTMLWidget)
393 if (!current->isDocumentNode() ) {
394 if ( doc()->firstChild()->id() == ID_HTML) {
395 // we have another <HTML> element.... apply attributes to existing one
396 // make sure we don't overwrite already existing attributes
397 NamedAttrMapImpl *map = static_cast<ElementImpl*>(n)->attributes(true);
398 NamedAttrMapImpl *bmap = static_cast<ElementImpl*>(doc()->firstChild())->attributes(false);
399 bool changed = false;
400 for (unsigned long l = 0; map && l < map->length(); ++l) {
401 AttributeImpl* it = map->attributeItem(l);
402 changed = !bmap->getAttributeItem(it->id());
403 bmap->insertAttribute(it->clone(false));
406 doc()->recalcStyle( NodeImpl::Inherit );
416 DOM::NodeImpl *newNode = head->addChild(n);
418 pushBlock(id, tagPriority[id]);
421 if(!n->attached() && HTMLWidget)
426 kdDebug( 6035 ) << "adding style before to body failed!!!!" << endl;
428 discard_until = ID_STYLE + ID_CLOSE_TAG;
433 discard_until = ID_STYLE + ID_CLOSE_TAG;
437 // SCRIPT and OBJECT are allowed in the body.
439 if(inBody && doc()->body()) {
440 // we have another <BODY> element.... apply attributes to existing one
441 // make sure we don't overwrite already existing attributes
442 // some sites use <body bgcolor=rightcolor>...<body bgcolor=wrongcolor>
443 NamedAttrMapImpl *map = static_cast<ElementImpl*>(n)->attributes(true);
444 NamedAttrMapImpl *bmap = doc()->body()->attributes(false);
445 bool changed = false;
446 for (unsigned long l = 0; map && l < map->length(); ++l) {
447 AttributeImpl* it = map->attributeItem(l);
448 changed = !bmap->getAttributeItem(it->id());
449 bmap->insertAttribute(it->clone(false));
452 doc()->recalcStyle( NodeImpl::Inherit );
453 } else if ( current->isDocumentNode() )
458 // the following is a hack to move non rendered elements
459 // outside of tables.
460 // needed for broken constructs like <table><form ...><tr>....
463 ElementImpl *e = static_cast<ElementImpl *>(n);
464 DOMString type = e->getAttribute(ATTR_TYPE);
466 if ( strcasecmp( type, "hidden" ) == 0 && form) {
469 if(!n->attached() && HTMLWidget)
477 // ignore text inside the following elements.
478 switch(current->id())
489 e = new HTMLDListElementImpl(document);
490 if ( insertNode(e) ) {
501 if(!n->attached() && HTMLWidget)
511 switch (current->id()) {
518 NodeImpl* tsection = current;
519 if (current->id() == ID_TR)
520 tsection = current->parent();
521 else if (current->id() == ID_TD || current->id() == ID_TH)
522 tsection = current->parent()->parent();
523 NodeImpl* table = tsection->parent();
524 int exceptioncode = 0;
525 table->insertBefore(n, tsection, exceptioncode);
526 pushBlock(id, tagPriority[id]);
528 inStrayTableContent++;
529 blockStack->strayTableContent = true;
541 if (isTableRelatedTag(current->id())) {
542 while (current->id() != ID_TABLE && isTableRelatedTag(current->id()))
544 return insertNode(n);
551 // switch on the currently active element
552 switch(current->id())
567 head = new HTMLHeadElementImpl(document);
574 TextImpl *t = static_cast<TextImpl *>(n);
575 if (t->containsOnlyWhitespace())
577 /* Fall through to default */
580 if ( haveFrameSet ) break;
581 e = new HTMLBodyElementImpl(document);
589 // we can get here only if the element is not allowed in head.
593 // This means the body starts here...
594 if ( haveFrameSet ) break;
596 e = new HTMLBodyElementImpl(document);
605 // Illegal content in a caption. Close the caption and try again.
606 popBlock(ID_CAPTION);
614 return insertNode(n, flat);
625 popBlock(ID_TABLE); // end the table
626 handled = true; // ...and start a new one
630 TextImpl *t = static_cast<TextImpl *>(n);
631 if (t->containsOnlyWhitespace())
633 DOMStringImpl *i = t->string();
634 unsigned int pos = 0;
635 while(pos < i->l && ( *(i->s+pos) == QChar(' ') ||
636 *(i->s+pos) == QChar(0xa0))) pos++;
642 NodeImpl *node = current;
643 NodeImpl *parent = node->parentNode();
645 NodeImpl *parentparent = parent->parentNode();
647 if (n->isTextNode() ||
648 ( node->id() == ID_TR &&
649 ( parent->id() == ID_THEAD ||
650 parent->id() == ID_TBODY ||
651 parent->id() == ID_TFOOT ) && parentparent->id() == ID_TABLE ) ||
652 ( !checkChild( ID_TR, id ) && ( node->id() == ID_THEAD || node->id() == ID_TBODY || node->id() == ID_TFOOT ) &&
653 parent->id() == ID_TABLE ))
655 node = (node->id() == ID_TABLE) ? node :
656 ((node->id() == ID_TR) ? parentparent : parent);
657 NodeImpl *parent = node->parentNode();
658 int exceptioncode = 0;
659 parent->insertBefore( n, node, exceptioncode );
660 if ( exceptioncode ) {
662 kdDebug( 6035 ) << "adding content before table failed!" << endl;
666 if (n->isElementNode() && tagPriority[id] != 0 &&
667 !flat && endTag[id] != DOM::FORBIDDEN)
669 pushBlock(id, tagPriority[id]);
671 inStrayTableContent++;
672 blockStack->strayTableContent = true;
677 if ( current->id() == ID_TR )
678 e = new HTMLTableCellElementImpl(document, ID_TD);
679 else if ( current->id() == ID_TABLE )
680 e = new HTMLTableSectionElementImpl( document, ID_TBODY, true /* implicit */ );
682 e = new HTMLTableRowElementImpl( document );
691 discard_until = ID_OBJECT + ID_CLOSE_TAG;
697 e = new HTMLDivElementImpl(document);
722 popBlock(current->id());
728 if (id == ID_OPTGROUP)
730 popBlock(current->id());
733 else if(id == ID_SELECT)
735 // IE treats a nested select as </select>. Let's do the same
736 popBlock( ID_SELECT );
740 // head elements in the body should be ignored.
742 popBlock(ID_ADDRESS);
747 popBlock(ID_COLGROUP);
756 if(current->isDocumentNode())
758 if(current->firstChild() == 0) {
759 e = new HTMLHtmlElementImpl(document);
764 else if(current->isInline())
771 // if we couldn't handle the error, just rethrow the exception...
774 //kdDebug( 6035 ) << "Exception handler failed in HTMLPArser::insertNode()" << endl;
778 return insertNode(n);
782 NodeImpl *KHTMLParser::getElement(Token* t)
787 if (!head && current->id() == ID_HTML) {
788 head = new HTMLHeadElementImpl(document);
793 // body no longer allowed if we have a frameset
798 return new HTMLBodyElementImpl(document);
803 if (inBody && !haveFrameSet && !haveContent) {
805 // ### actually for IE document.body returns the now hidden "body" element
806 // we can't implement that behaviour now because it could cause too many
807 // regressions and the headaches are not worth the work as long as there is
808 // no site actually relying on that detail (Dirk)
810 doc()->body()->setAttribute(ATTR_STYLE, "display:none");
813 if ((haveContent || haveFrameSet) && current->id() == ID_HTML)
817 return new HTMLFrameSetElementImpl(document);
819 // a bit of a special case, since the frame is inlined
821 discard_until = ID_IFRAME + ID_CLOSE_TAG;
826 // Only create a new form if we're not already inside one.
827 // This is consistent with other browsers' behavior.
830 form = new HTMLFormElementImpl(document);
833 NodeImpl *n = handleIsindex(t);
845 return new HTMLKeygenElementImpl(document, form);
847 return new HTMLLegendElementImpl(document, form);
849 return new HTMLOptGroupElementImpl(document, form);
851 return new HTMLOptionElementImpl(document, form);
854 return new HTMLSelectElementImpl(document, form);
856 return new HTMLTextAreaElementImpl(document, form);
873 // Never allow nested <a>s.
879 return new HTMLImageElementImpl(document, form);
881 map = new HTMLMapElementImpl(document);
901 // elements with no special representation in the DOM
910 if (!allowNestedRedundantTag(t->id))
916 popBlock(t->id); // Don't allow nested <nobr> or <wbr>
919 // these are special, and normally not rendered
921 discard_until = ID_NOEMBED + ID_CLOSE_TAG;
924 discard_until = ID_NOFRAMES + ID_CLOSE_TAG;
927 if (HTMLWidget && HTMLWidget->part()->jScriptEnabled())
928 discard_until = ID_NOSCRIPT + ID_CLOSE_TAG;
931 //discard_until = ID_NOLAYER + ID_CLOSE_TAG;
934 return new TextImpl(document, t->text);
936 if (!includesCommentsInDOM)
941 return document->document()->createHTMLElement(t->id);
944 #define MAX_REDUNDANT 20
946 bool KHTMLParser::allowNestedRedundantTag(int _id)
948 // www.liceo.edu.mx is an example of a site that achieves a level of nesting of
949 // about 1500 tags, all from a bunch of <b>s. We will only allow at most 20
950 // nested tags of the same type before just ignoring them all together.
952 for (HTMLStackElem* curr = blockStack;
953 i < MAX_REDUNDANT && curr && curr->id == _id;
954 curr = curr->next, i++);
955 return i != MAX_REDUNDANT;
958 void KHTMLParser::processCloseTag(Token *t)
960 // support for really broken html. Can't believe I'm supporting such crap (lars)
963 case ID_HTML+ID_CLOSE_TAG:
964 case ID_BODY+ID_CLOSE_TAG:
965 // we never close the body tag, since some stupid web pages close it before the actual end of the doc.
966 // let's rely on the end() call to close things.
968 case ID_FORM+ID_CLOSE_TAG:
970 // this one is to get the right style on the body element
972 case ID_MAP+ID_CLOSE_TAG:
975 case ID_SELECT+ID_CLOSE_TAG:
983 kdDebug( 6035 ) << "added the following childs to " << current->nodeName().string() << endl;
984 NodeImpl *child = current->firstChild();
987 kdDebug( 6035 ) << " " << child->nodeName().string() << endl;
988 child = child->nextSibling();
991 HTMLStackElem* oldElem = blockStack;
992 popBlock(t->id-ID_CLOSE_TAG);
993 if (oldElem == blockStack && t->id == ID_P+ID_CLOSE_TAG) {
994 // We encountered a stray </p>. Amazingly Gecko, WinIE, and MacIE all treat
995 // this as a valid break, i.e., <p></p>. So go ahead and make the empty
1002 kdDebug( 6035 ) << "closeTag --> current = " << current->nodeName().string() << endl;
1006 bool KHTMLParser::isHeaderTag(int _id)
1021 void KHTMLParser::popNestedHeaderTag()
1023 // This function only cares about checking for nested headers that have only inlines in between them.
1024 NodeImpl* currNode = current;
1025 for (HTMLStackElem* curr = blockStack; curr; curr = curr->next) {
1026 if (isHeaderTag(curr->id)) {
1030 if (currNode && !currNode->isInline())
1032 currNode = curr->node;
1036 bool KHTMLParser::isResidualStyleTag(int _id)
1062 bool KHTMLParser::isAffectedByResidualStyle(int _id)
1064 if (isResidualStyleTag(_id))
1092 void KHTMLParser::handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem* elem)
1094 // Find the element that crosses over to a higher level. For now, if there is more than
1095 // one, we will just give up and not attempt any sort of correction. It's highly unlikely that
1096 // there will be more than one, since <p> tags aren't allowed to be nested.
1097 int exceptionCode = 0;
1098 HTMLStackElem* curr = blockStack;
1099 HTMLStackElem* maxElem = 0;
1100 HTMLStackElem* prev = 0;
1101 HTMLStackElem* prevMaxElem = 0;
1102 while (curr && curr != elem) {
1103 if (curr->level > elem->level) {
1114 if (!curr || !maxElem || !isAffectedByResidualStyle(maxElem->id)) return;
1116 NodeImpl* residualElem = prev->node;
1117 NodeImpl* blockElem = prevMaxElem ? prevMaxElem->node : current;
1118 NodeImpl* parentElem = elem->node;
1120 // Check to see if the reparenting that is going to occur is allowed according to the DOM.
1121 // FIXME: We should either always allow it or perform an additional fixup instead of
1122 // just bailing here.
1123 // Example: <p><font><center>blah</font></center></p> isn't doing a fixup right now.
1124 if (!parentElem->childAllowed(blockElem))
1127 if (maxElem->node->parentNode() != elem->node) {
1128 // Walk the stack and remove any elements that aren't residual style tags. These
1129 // are basically just being closed up. Example:
1130 // <font><span>Moo<p>Goo</font></p>.
1131 // In the above example, the <span> doesn't need to be reopened. It can just close.
1132 HTMLStackElem* currElem = maxElem->next;
1133 HTMLStackElem* prevElem = maxElem;
1134 while (currElem != elem) {
1135 HTMLStackElem* nextElem = currElem->next;
1136 if (!isResidualStyleTag(currElem->id)) {
1137 prevElem->next = nextElem;
1138 prevElem->node = currElem->node;
1142 prevElem = currElem;
1143 currElem = nextElem;
1146 // We have to reopen residual tags in between maxElem and elem. An example of this case is:
1147 // <font><i>Moo<p>Foo</font>.
1148 // In this case, we need to transform the part before the <p> into:
1149 // <font><i>Moo</i></font><i>
1150 // so that the <i> will remain open. This involves the modification of elements
1151 // in the block stack.
1152 // This will also affect how we ultimately reparent the block, since we want it to end up
1153 // under the reopened residual tags (e.g., the <i> in the above example.)
1154 NodeImpl* prevNode = 0;
1155 NodeImpl* currNode = 0;
1157 while (currElem->node != residualElem) {
1158 if (isResidualStyleTag(currElem->node->id())) {
1159 // Create a clone of this element.
1160 currNode = currElem->node->cloneNode(false);
1162 // Change the stack element's node to point to the clone.
1163 currElem->node = currNode;
1165 // Attach the previous node as a child of this new node.
1167 currNode->appendChild(prevNode, exceptionCode);
1168 else // The new parent for the block element is going to be the innermost clone.
1169 parentElem = currNode;
1171 prevNode = currNode;
1174 currElem = currElem->next;
1177 // Now append the chain of new residual style elements if one exists.
1179 elem->node->appendChild(prevNode, exceptionCode);
1182 // We need to make a clone of |residualElem| and place it just inside |blockElem|.
1183 // All content of |blockElem| is reparented to be under this clone. We then
1184 // reparent |blockElem| using real DOM calls so that attachment/detachment will
1185 // be performed to fix up the rendering tree.
1186 // So for this example: <b>...<p>Foo</b>Goo</p>
1187 // The end result will be: <b>...</b><p><b>Foo</b>Goo</p>
1189 // Step 1: Remove |blockElem| from its parent, doing a batch detach of all the kids.
1190 blockElem->parentNode()->removeChild(blockElem, exceptionCode);
1192 // Step 2: Clone |residualElem|.
1193 NodeImpl* newNode = residualElem->cloneNode(false); // Shallow clone. We don't pick up the same kids.
1195 // Step 3: Place |blockElem|'s children under |newNode|. Remove all of the children of |blockElem|
1196 // before we've put |newElem| into the document. That way we'll only do one attachment of all
1197 // the new content (instead of a bunch of individual attachments).
1198 NodeImpl* currNode = blockElem->firstChild();
1200 NodeImpl* nextNode = currNode->nextSibling();
1201 blockElem->removeChild(currNode, exceptionCode);
1202 newNode->appendChild(currNode, exceptionCode);
1203 currNode = nextNode;
1206 // Step 4: Place |newNode| under |blockElem|. |blockElem| is still out of the document, so no
1207 // attachment can occur yet.
1208 blockElem->appendChild(newNode, exceptionCode);
1210 // Step 5: Reparent |blockElem|. Now the full attachment of the fixed up tree takes place.
1211 parentElem->appendChild(blockElem, exceptionCode);
1213 // Step 6: Elide |elem|, since it is effectively no longer open. Also update
1214 // the node associated with the previous stack element so that when it gets popped,
1215 // it doesn't make the residual element the next current node.
1216 HTMLStackElem* currElem = maxElem;
1217 HTMLStackElem* prevElem = 0;
1218 while (currElem != elem) {
1219 prevElem = currElem;
1220 currElem = currElem->next;
1222 prevElem->next = elem->next;
1223 prevElem->node = elem->node;
1226 // Step 7: Reopen intermediate inlines, e.g., <b><p><i>Foo</b>Goo</p>.
1227 // In the above example, Goo should stay italic.
1229 HTMLStackElem* residualStyleStack = 0;
1230 while (curr && curr != maxElem) {
1231 // We will actually schedule this tag for reopening
1232 // after we complete the close of this entire block.
1233 NodeImpl* currNode = current;
1234 if (isResidualStyleTag(curr->id)) {
1235 // We've overloaded the use of stack elements and are just reusing the
1236 // struct with a slightly different meaning to the variables. Instead of chaining
1237 // from innermost to outermost, we build up a list of all the tags we need to reopen
1238 // from the outermost to the innermost, i.e., residualStyleStack will end up pointing
1239 // to the outermost tag we need to reopen.
1240 // We also set curr->node to be the actual element that corresponds to the ID stored in
1241 // curr->id rather than the node that you should pop to when the element gets pulled off
1244 curr->node = currNode;
1245 curr->next = residualStyleStack;
1246 residualStyleStack = curr;
1254 reopenResidualStyleTags(residualStyleStack, 0); // FIXME: Deal with stray table content some day
1255 // if it becomes necessary to do so.
1258 void KHTMLParser::reopenResidualStyleTags(HTMLStackElem* elem, DOM::NodeImpl* malformedTableParent)
1260 // Loop for each tag that needs to be reopened.
1262 // Create a shallow clone of the DOM node for this element.
1263 NodeImpl* newNode = elem->node->cloneNode(false);
1265 // Append the new node. In the malformed table case, we need to insert before the table,
1266 // which will be the last child.
1267 int exceptionCode = 0;
1268 if (malformedTableParent)
1269 malformedTableParent->insertBefore(newNode, malformedTableParent->lastChild(), exceptionCode);
1271 current->appendChild(newNode, exceptionCode);
1272 // FIXME: Is it really OK to ignore the exceptions here?
1274 // Now push a new stack element for this node we just created.
1275 pushBlock(elem->id, elem->level);
1277 // Set our strayTableContent boolean if needed, so that the reopened tag also knows
1278 // that it is inside a malformed table.
1279 blockStack->strayTableContent = malformedTableParent != 0;
1280 if (blockStack->strayTableContent)
1281 inStrayTableContent++;
1283 // Clear our malformed table parent variable.
1284 malformedTableParent = 0;
1286 // Update |current| manually to point to the new node.
1287 setCurrent(newNode);
1289 // Advance to the next tag that needs to be reopened.
1290 HTMLStackElem* next = elem->next;
1296 void KHTMLParser::pushBlock(int _id, int _level)
1298 HTMLStackElem *Elem = new HTMLStackElem(_id, _level, current, blockStack);
1301 addForbidden(_id, forbiddenTag);
1304 void KHTMLParser::popBlock( int _id )
1306 HTMLStackElem *Elem = blockStack;
1311 kdDebug( 6035 ) << "popBlock(" << getTagName(_id).string() << ")" << endl;
1313 kdDebug( 6035) << " > " << getTagName(Elem->id).string() << endl;
1319 while( Elem && (Elem->id != _id))
1321 if (maxLevel < Elem->level)
1323 maxLevel = Elem->level;
1331 if (maxLevel > Elem->level) {
1332 // We didn't match because the tag is in a different scope, e.g.,
1333 // <b><p>Foo</b>. Try to correct the problem.
1334 if (!isResidualStyleTag(_id))
1336 return handleResidualStyleCloseTagAcrossBlocks(Elem);
1339 bool isAffectedByStyle = isAffectedByResidualStyle(Elem->id);
1340 HTMLStackElem* residualStyleStack = 0;
1341 NodeImpl* malformedTableParent = 0;
1346 if (Elem->id == _id)
1348 int strayTable = inStrayTableContent;
1352 // This element was the root of some malformed content just inside an implicit or
1353 // explicit <tbody> or <tr>.
1354 // If we end up needing to reopen residual style tags, the root of the reopened chain
1355 // must also know that it is the root of malformed content inside a <tbody>/<tr>.
1356 if (strayTable && (inStrayTableContent < strayTable) && residualStyleStack) {
1357 NodeImpl* curr = current;
1358 while (curr && curr->id() != ID_TABLE)
1359 curr = curr->parentNode();
1360 malformedTableParent = curr ? curr->parentNode() : 0;
1365 if (Elem->id == ID_FORM && form)
1366 // A <form> is being closed prematurely (and this is
1367 // malformed HTML). Set an attribute on the form to clear out its
1369 form->setMalformed(true);
1371 // Schedule this tag for reopening
1372 // after we complete the close of this entire block.
1373 NodeImpl* currNode = current;
1374 if (isAffectedByStyle && isResidualStyleTag(Elem->id)) {
1375 // We've overloaded the use of stack elements and are just reusing the
1376 // struct with a slightly different meaning to the variables. Instead of chaining
1377 // from innermost to outermost, we build up a list of all the tags we need to reopen
1378 // from the outermost to the innermost, i.e., residualStyleStack will end up pointing
1379 // to the outermost tag we need to reopen.
1380 // We also set Elem->node to be the actual element that corresponds to the ID stored in
1381 // Elem->id rather than the node that you should pop to when the element gets pulled off
1384 Elem->next = residualStyleStack;
1385 Elem->node = currNode;
1386 residualStyleStack = Elem;
1394 reopenResidualStyleTags(residualStyleStack, malformedTableParent);
1397 void KHTMLParser::popOneBlock(bool delBlock)
1399 HTMLStackElem *Elem = blockStack;
1401 // we should never get here, but some bad html might cause it.
1402 #ifndef PARSER_DEBUG
1405 kdDebug( 6035 ) << "popping block: " << getTagName(Elem->id).string() << "(" << Elem->id << ")" << endl;
1409 if((Elem->node != current)) {
1410 if (current->maintainsState() && doc()){
1411 doc()->registerMaintainsState(current);
1412 QStringList &states = doc()->restoreState();
1413 if (!states.isEmpty())
1414 current->restoreState(states);
1419 removeForbidden(Elem->id, forbiddenTag);
1421 blockStack = Elem->next;
1422 setCurrent(Elem->node);
1424 if (Elem->strayTableContent)
1425 inStrayTableContent--;
1431 void KHTMLParser::popInlineBlocks()
1433 while(current->isInline())
1437 void KHTMLParser::freeBlock()
1443 void KHTMLParser::createHead()
1445 if(head || !doc()->firstChild())
1448 head = new HTMLHeadElementImpl(document);
1449 HTMLElementImpl *body = doc()->body();
1450 int exceptioncode = 0;
1451 doc()->firstChild()->insertBefore(head, body, exceptioncode);
1452 if ( exceptioncode ) {
1454 kdDebug( 6035 ) << "creation of head failed!!!!" << endl;
1460 NodeImpl *KHTMLParser::handleIsindex( Token *t )
1463 HTMLFormElementImpl *myform = form;
1465 myform = new HTMLFormElementImpl(document);
1468 n = new HTMLDivElementImpl( document );
1469 NodeImpl *child = new HTMLHRElementImpl( document );
1470 n->addChild( child );
1471 AttributeImpl* a = t->attrs ? t->attrs->getAttributeItem(ATTR_PROMPT) : 0;
1473 DOMString text = searchableIndexIntroduction();
1475 DOMString text = i18n("This is a searchable index. Enter search keywords: ");
1478 text = DOMString(a->value()) + " ";
1479 child = new TextImpl(document, text);
1480 n->addChild( child );
1481 child = new HTMLIsIndexElementImpl(document, myform);
1482 static_cast<ElementImpl *>(child)->setAttribute(ATTR_TYPE, "khtml_isindex");
1483 n->addChild( child );
1484 child = new HTMLHRElementImpl( document );
1485 n->addChild( child );
1490 void KHTMLParser::startBody()
1497 insertNode( isindex, true /* don't decend into this node */ );
1502 void KHTMLParser::finished()
1504 // This ensures that "current" is not left pointing to a node when the document is destroyed.