Reviewed by Hyatt
authorkocienda <kocienda@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Tue, 9 Nov 2004 00:32:21 +0000 (00:32 +0000)
committerkocienda <kocienda@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Tue, 9 Nov 2004 00:32:21 +0000 (00:32 +0000)
        * khtml/html/html_elementimpl.cpp:
        (HTMLElementImpl::createContextualFragment): Now takes flag to control whether comments
        are added to the DOM.
        * khtml/html/html_elementimpl.h: Ditto.
        * khtml/html/htmlparser.cpp:
        (KHTMLParser::KHTMLParser): Ditto.
        (KHTMLParser::getElement): Remove ifdef for comment processing. Replace with flag check.
        * khtml/html/htmlparser.h: Add flag to constructor so callers can request comment nodes.
        * khtml/html/htmltokenizer.cpp:
        (khtml::HTMLTokenizer::HTMLTokenizer): Add flag to constructor so callers can request comment nodes.
        (khtml::HTMLTokenizer::parseComment): Fix code to handle parsing out comment text correctly.
        There were a couple of indexing errors that resulted in the comment text containing part of the
        comment markers.
        (khtml::HTMLTokenizer::processToken): Don't let token id be reset to ID_TEXT if token is a comment.
        * khtml/html/htmltokenizer.h:  Add flag to constructor so callers can request comment nodes.

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@7958 268f45cc-cd09-0410-ab3c-d52691b4dbfc

WebCore/ChangeLog-2005-08-23
WebCore/khtml/html/html_elementimpl.cpp
WebCore/khtml/html/html_elementimpl.h
WebCore/khtml/html/htmlparser.cpp
WebCore/khtml/html/htmlparser.h
WebCore/khtml/html/htmltokenizer.cpp
WebCore/khtml/html/htmltokenizer.h

index 92218612ebc0b704637a095d29e5769fd31864da..6bd771e4728e2846e9b0514839124706700beb93 100644 (file)
@@ -1,3 +1,23 @@
+2004-11-08  Ken Kocienda  <kocienda@apple.com>
+
+        Reviewed by Hyatt
+
+        * khtml/html/html_elementimpl.cpp:
+        (HTMLElementImpl::createContextualFragment): Now takes flag to control whether comments
+        are added to the DOM.
+        * khtml/html/html_elementimpl.h: Ditto.
+        * khtml/html/htmlparser.cpp:
+        (KHTMLParser::KHTMLParser): Ditto.
+        (KHTMLParser::getElement): Remove ifdef for comment processing. Replace with flag check.
+        * khtml/html/htmlparser.h: Add flag to constructor so callers can request comment nodes.
+        * khtml/html/htmltokenizer.cpp:
+        (khtml::HTMLTokenizer::HTMLTokenizer): Add flag to constructor so callers can request comment nodes.
+        (khtml::HTMLTokenizer::parseComment): Fix code to handle parsing out comment text correctly.
+        There were a couple of indexing errors that resulted in the comment text containing part of the 
+        comment markers.
+        (khtml::HTMLTokenizer::processToken): Don't let token id be reset to ID_TEXT if token is a comment.
+        * khtml/html/htmltokenizer.h:  Add flag to constructor so callers can request comment nodes.
+
 2004-11-08  Chris Blumenberg  <cblu@apple.com>
 
        Fixed: <rdar://problem/3870907> WebCore unnecessary links against JavaVM and Security
index 62dfcde96854c33bb371b384ce328530d787948d..a4f3fdb198e1718f205c5e3c522729eefba63d19 100644 (file)
@@ -726,7 +726,7 @@ DOMString HTMLElementImpl::outerText() const
     return innerText();
 }
 
-DocumentFragmentImpl *HTMLElementImpl::createContextualFragment( const DOMString &html )
+DocumentFragmentImpl *HTMLElementImpl::createContextualFragment(const DOMString &html, bool includeCommentsInDOM)
 {
     // the following is in accordance with the definition as used by IE
     if( endTag[id()] == FORBIDDEN )
@@ -751,7 +751,7 @@ DocumentFragmentImpl *HTMLElementImpl::createContextualFragment( const DOMString
     DocumentFragmentImpl *fragment = new DocumentFragmentImpl( docPtr() );
     fragment->ref();
     {
-        HTMLTokenizer tok( docPtr(), fragment );
+        HTMLTokenizer tok(docPtr(), fragment, includeCommentsInDOM);
         tok.write( html.string(), true );
         tok.finish();
     }
index 2982b871267ac3b30800eeebc44f56817bcffc24..9607e149cd9e00b903265a6a160a4d495db13bed 100644 (file)
@@ -152,7 +152,7 @@ public:
     DOMString outerHTML() const;
     DOMString innerText() const;
     DOMString outerText() const;
-    DocumentFragmentImpl *createContextualFragment( const DOMString &html );
+    DocumentFragmentImpl *createContextualFragment(const DOMString &html, bool includeCommentsInDOM=false);
     bool setInnerHTML( const DOMString &html );
     bool setOuterHTML( const DOMString &html );
     bool setInnerText( const DOMString &text );
index 384d85f4e41dba1c18c80b7cd7127202be4261f9..9b2b2c828bc43c57c47b1f46847cacf64488b929 100644 (file)
@@ -112,8 +112,8 @@ public:
  *    element or ignore the tag.
  *
  */
-KHTMLParser::KHTMLParser( KHTMLView *_parent, DocumentPtr *doc
-    : current(0), currentIsReferenced(false)
+KHTMLParser::KHTMLParser(KHTMLView *_parent, DocumentPtr *doc, bool includesComments
+    : current(0), currentIsReferenced(false), includesCommentsInDOM(includesComments)
 {
     //kdDebug( 6035 ) << "parser constructor" << endl;
 #if SPEED_DEBUG > 0
@@ -132,8 +132,8 @@ KHTMLParser::KHTMLParser( KHTMLView *_parent, DocumentPtr *doc)
     reset();
 }
 
-KHTMLParser::KHTMLParser( DOM::DocumentFragmentImpl *i, DocumentPtr *doc )
-    : current(0), currentIsReferenced(false)
+KHTMLParser::KHTMLParser(DOM::DocumentFragmentImpl *i, DocumentPtr *doc, bool includesComments)
+    : current(0), currentIsReferenced(false), includesCommentsInDOM(includesComments)
 {
     HTMLWidget = 0;
     document = doc;
@@ -1134,9 +1134,8 @@ NodeImpl *KHTMLParser::getElement(Token* t)
         n = new TextImpl(document, t->text);
         break;
     case ID_COMMENT:
-#ifdef COMMENTS_IN_DOM
-        n = new CommentImpl(document, t->text);
-#endif
+        if (includesCommentsInDOM)
+            n = new CommentImpl(document, t->text);
         break;
     default:
         kdDebug( 6035 ) << "Unknown tag " << t->id << "!" << endl;
index 4b8c6bb5fb3c98629e5a2518e0dccbb2add80c51..d392d200cbbac09f79adffb5693b316900c9aec4 100644 (file)
@@ -70,8 +70,8 @@ class KHTMLParser;
 class KHTMLParser
 {
 public:
-    KHTMLParser( KHTMLView *w, DOM::DocumentPtr *i );
-    KHTMLParser( DOM::DocumentFragmentImpl *frag, DOM::DocumentPtr *doc );
+    KHTMLParser(KHTMLView *w, DOM::DocumentPtr *i, bool includesComments=false);
+    KHTMLParser(DOM::DocumentFragmentImpl *frag, DOM::DocumentPtr *doc, bool includesComments=false);
     virtual ~KHTMLParser();
 
     /**
@@ -181,6 +181,8 @@ protected:
 
     bool headLoaded;
     int inStrayTableContent;
+
+    bool includesCommentsInDOM;
     
 #if SPEED_DEBUG > 0
     QTime qt;
index ec0205c17c8ea03d6da756243fc0ff53071ba17a..45b4357a404bf55e366b20eff0ca7e8bb874f9de 100644 (file)
@@ -224,7 +224,7 @@ inline bool tagMatch(const char *s1, const QChar *s2, uint length)
 
 // ----------------------------------------------------------------------------
 
-HTMLTokenizer::HTMLTokenizer(DOM::DocumentPtr *_doc, KHTMLView *_view)
+HTMLTokenizer::HTMLTokenizer(DOM::DocumentPtr *_doc, KHTMLView *_view, bool includesComments)
 #ifndef NDEBUG
     : inWrite(false)
 #endif
@@ -234,16 +234,17 @@ HTMLTokenizer::HTMLTokenizer(DOM::DocumentPtr *_doc, KHTMLView *_view)
     scriptCode = 0;
     scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0;
     charsets = KGlobal::charsets();
-    parser = new KHTMLParser(_view, _doc);
+    parser = new KHTMLParser(_view, _doc, includesComments);
     m_executingScript = 0;
     loadingExtScript = false;
     onHold = false;
     attrNamePresent = false;
+    includesCommentsInDOM = includesComments;
     
     begin();
 }
 
-HTMLTokenizer::HTMLTokenizer(DOM::DocumentPtr *_doc, DOM::DocumentFragmentImpl *i)
+HTMLTokenizer::HTMLTokenizer(DOM::DocumentPtr *_doc, DOM::DocumentFragmentImpl *i, bool includesComments)
 #ifndef NDEBUG
     : inWrite(false)
 #endif
@@ -253,10 +254,11 @@ HTMLTokenizer::HTMLTokenizer(DOM::DocumentPtr *_doc, DOM::DocumentFragmentImpl *
     scriptCode = 0;
     scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0;
     charsets = KGlobal::charsets();
-    parser = new KHTMLParser( i, _doc );
+    parser = new KHTMLParser(i, _doc, includesComments);
     m_executingScript = 0;
     loadingExtScript = false;
     onHold = false;
+    includesCommentsInDOM = includesComments;
 
     begin();
 }
@@ -623,31 +625,36 @@ void HTMLTokenizer::parseComment(TokenizerString &src)
         qDebug("comment is now: *%s*",
                QConstString((QChar*)src.current(), QMIN(16, src.length())).string().latin1());
 #endif
-        if (src->unicode() == '>' &&
-            ( ( brokenComments && !( script || style ) ) ||
-              ( scriptCodeSize > 2 && scriptCode[scriptCodeSize-3] == '-' &&
-                scriptCode[scriptCodeSize-2] == '-' ) ||
-              // Other browsers will accept --!> as a close comment, even though it's
-              // not technically valid.
-              ( scriptCodeSize > 3 && scriptCode[scriptCodeSize-4] == '-' &&
-                scriptCode[scriptCodeSize-3] == '-' &&
-                scriptCode[scriptCodeSize-2] == '!' ) ) ) {
-            ++src;
-            if ( !( script || xmp || textarea || style) ) {
-#ifdef COMMENTS_IN_DOM
-                checkScriptBuffer();
-                scriptCode[ scriptCodeSize ] = 0;
-                scriptCode[ scriptCodeSize + 1 ] = 0;
-                currToken.id = ID_COMMENT;
-                processListing(TokenizerString(scriptCode, scriptCodeSize - 2));
-                processToken();
-                currToken.id = ID_COMMENT + ID_CLOSE_TAG;
-                processToken();
-#endif
-                scriptCodeSize = 0;
+        if (src->unicode() == '>') {
+            bool handleBrokenComments = brokenComments && !(script || style);
+            int endCharsCount = 1; // start off with one for the '>' character
+            if (scriptCodeSize > 2 && scriptCode[scriptCodeSize-3] == '-' && scriptCode[scriptCodeSize-2] == '-') {
+                endCharsCount = 3;
+            }
+            else if (scriptCodeSize > 3 && scriptCode[scriptCodeSize-4] == '-' && scriptCode[scriptCodeSize-3] == '-' && 
+                scriptCode[scriptCodeSize-2] == '!') {
+                // Other browsers will accept --!> as a close comment, even though it's
+                // not technically valid.
+                endCharsCount = 4;
+            }
+            if (handleBrokenComments || endCharsCount > 1) {
+                ++src;
+                if (!( script || xmp || textarea || style)) {
+                    if (includesCommentsInDOM) {
+                        checkScriptBuffer();
+                        scriptCode[ scriptCodeSize ] = 0;
+                        scriptCode[ scriptCodeSize + 1 ] = 0;
+                        currToken.id = ID_COMMENT;
+                        processListing(TokenizerString(scriptCode, scriptCodeSize - endCharsCount));
+                        processToken();
+                        currToken.id = ID_COMMENT + ID_CLOSE_TAG;
+                        processToken();
+                    }
+                    scriptCodeSize = 0;
+                }
+                comment = false;
+                return; // Finished parsing comment
             }
-            comment = false;
-            return; // Finished parsing comment
         }
         ++src;
     }
@@ -1822,7 +1829,8 @@ void HTMLTokenizer::processToken()
 #endif
         currToken.text = new DOMStringImpl( buffer, dest - buffer );
         currToken.text->ref();
-        currToken.id = ID_TEXT;
+        if (currToken.id != ID_COMMENT)
+            currToken.id = ID_TEXT;
     }
     else if(!currToken.id) {
         currToken.reset();
index c427d9ad21705b7133883db6e1ba6d664a80394f..52b2aff62af0dfde763089563f0cd268890e6d31 100644 (file)
@@ -124,8 +124,8 @@ public:
 class HTMLTokenizer : public Tokenizer, public CachedObjectClient
 {
 public:
-    HTMLTokenizer(DOM::DocumentPtr *, KHTMLView * = 0);
-    HTMLTokenizer(DOM::DocumentPtr *, DOM::DocumentFragmentImpl *frag);
+    HTMLTokenizer(DOM::DocumentPtr *, KHTMLView * = 0, bool includesComments=false);
+    HTMLTokenizer(DOM::DocumentPtr *, DOM::DocumentFragmentImpl *frag, bool includesComments=false);
     virtual ~HTMLTokenizer();
 
     virtual void write(const TokenizerString &str, bool appendData);
@@ -344,6 +344,8 @@ protected:
     int scriptStartLineno;
     int tagStartLineno;
 
+    bool includesCommentsInDOM;
+
 // This buffer can hold arbitrarily long user-defined attribute names, such as in EMBED tags.
 // So any fixed number might be too small, but rather than rewriting all usage of this buffer
 // we'll just make it large enough to handle all imaginable cases.