2 * Copyright (C) 2000 Peter Kelly (pmk@post.com)
3 * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved.
4 * Copyright (C) 2006 Alexey Proskuryakov (ap@webkit.org)
5 * Copyright (C) 2007 Samuel Weinig (sam@webkit.org)
6 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
7 * Copyright (C) 2008 Holger Hans Peter Freyther
8 * Copyright (C) 2008, 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Library General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Library General Public License for more details.
20 * You should have received a copy of the GNU Library General Public License
21 * along with this library; see the file COPYING.LIB. If not, write to
22 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23 * Boston, MA 02110-1301, USA.
27 #include "XMLDocumentParser.h"
29 #include "CDATASection.h"
30 #include "CachedScript.h"
32 #include "CachedResourceLoader.h"
34 #include "DocumentFragment.h"
35 #include "DocumentType.h"
37 #include "FrameLoader.h"
38 #include "FrameView.h"
39 #include "HTMLEntityParser.h"
40 #include "HTMLHtmlElement.h"
41 #include "HTMLLinkElement.h"
42 #include "HTMLNames.h"
43 #include "HTMLStyleElement.h"
44 #include "ProcessingInstruction.h"
45 #include "ResourceError.h"
46 #include "ResourceHandle.h"
47 #include "ResourceRequest.h"
48 #include "ResourceResponse.h"
49 #include "ScriptableDocumentParser.h"
50 #include "ScriptController.h"
51 #include "ScriptElement.h"
52 #include "ScriptSourceCode.h"
53 #include "ScriptValue.h"
54 #include "TextResourceDecoder.h"
55 #include "TransformSource.h"
57 #include <wtf/StringExtras.h>
58 #include <wtf/Threading.h>
59 #include <wtf/Vector.h>
60 #include <wtf/text/CString.h>
63 #include "HTMLNames.h"
64 #include "HTMLScriptElement.h"
71 class EntityResolver : public QXmlStreamEntityResolver {
72 virtual QString resolveUndeclaredEntity(const QString &name);
75 QString EntityResolver::resolveUndeclaredEntity(const QString &name)
77 UChar c = decodeNamedEntity(name.toUtf8().constData());
81 // --------------------------------
83 bool XMLDocumentParser::supportsXMLVersion(const String& version)
85 return version == "1.0";
88 XMLDocumentParser::XMLDocumentParser(Document* document, FrameView* frameView)
89 : ScriptableDocumentParser(document)
92 , m_currentNode(document)
94 , m_sawXSLTransform(false)
95 , m_sawFirstElement(false)
96 , m_isXHTMLDocument(false)
98 , m_isXHTMLMPDocument(false)
99 , m_hasDocTypeDeclaration(false)
101 , m_parserPaused(false)
102 , m_requestingScript(false)
103 , m_finishCalled(false)
106 , m_lastErrorColumn(0)
108 , m_scriptStartPosition(TextPosition1::belowRangePosition())
109 , m_parsingFragment(false)
110 , m_scriptingPermission(FragmentScriptingAllowed)
112 m_stream.setEntityResolver(new EntityResolver);
115 XMLDocumentParser::XMLDocumentParser(DocumentFragment* fragment, Element* parentElement, FragmentScriptingPermission permission)
116 : ScriptableDocumentParser(fragment->document())
119 , m_currentNode(fragment)
121 , m_sawXSLTransform(false)
122 , m_sawFirstElement(false)
123 , m_isXHTMLDocument(false)
125 , m_isXHTMLMPDocument(false)
126 , m_hasDocTypeDeclaration(false)
128 , m_parserPaused(false)
129 , m_requestingScript(false)
130 , m_finishCalled(false)
133 , m_lastErrorColumn(0)
135 , m_scriptStartPosition(TextPosition1::belowRangePosition())
136 , m_parsingFragment(true)
137 , m_scriptingPermission(permission)
141 // Add namespaces based on the parent node
142 Vector<Element*> elemStack;
143 while (parentElement) {
144 elemStack.append(parentElement);
146 Node* n = parentElement->parentNode();
147 if (!n || !n->isElementNode())
149 parentElement = static_cast<Element*>(n);
152 if (elemStack.isEmpty())
155 QXmlStreamNamespaceDeclarations namespaces;
156 for (Element* element = elemStack.last(); !elemStack.isEmpty(); elemStack.removeLast()) {
157 if (NamedNodeMap* attrs = element->attributes()) {
158 for (unsigned i = 0; i < attrs->length(); i++) {
159 Attribute* attr = attrs->attributeItem(i);
160 if (attr->localName() == "xmlns")
161 m_defaultNamespaceURI = attr->value();
162 else if (attr->prefix() == "xmlns")
163 namespaces.append(QXmlStreamNamespaceDeclaration(attr->localName(), attr->value()));
167 m_stream.addExtraNamespaceDeclarations(namespaces);
168 m_stream.setEntityResolver(new EntityResolver);
170 // If the parent element is not in document tree, there may be no xmlns attribute; just default to the parent's namespace.
171 if (m_defaultNamespaceURI.isNull() && !parentElement->inDocument())
172 m_defaultNamespaceURI = parentElement->namespaceURI();
175 XMLDocumentParser::~XMLDocumentParser()
177 clearCurrentNodeStack();
179 m_pendingScript->removeClient(this);
180 delete m_stream.entityResolver();
183 void XMLDocumentParser::doWrite(const String& parseString)
187 if (document()->decoder() && document()->decoder()->sawError()) {
188 // If the decoder saw an error, report it as fatal (stops parsing)
189 handleError(fatal, "Encoding error", lineNumber(), columnNumber());
193 QString data(parseString);
194 if (!data.isEmpty()) {
195 m_stream.addData(data);
202 void XMLDocumentParser::initializeParserContext(const char*)
204 DocumentParser::startParsing();
206 m_sawXSLTransform = false;
207 m_sawFirstElement = false;
210 void XMLDocumentParser::doEnd()
213 if (m_sawXSLTransform) {
214 document()->setTransformSource(new TransformSource(m_originalSourceForTransform));
215 document()->setParsing(false); // Make the doc think it's done, so it will apply xsl sheets.
216 document()->styleSelectorChanged(RecalcStyleImmediately);
217 document()->setParsing(true);
218 DocumentParser::stopParsing();
222 if (m_stream.error() == QXmlStreamReader::PrematureEndOfDocumentError
223 || (m_wroteText && !m_sawFirstElement && !m_sawXSLTransform && !m_sawError))
224 handleError(fatal, qPrintable(m_stream.errorString()), lineNumber(), columnNumber());
227 int XMLDocumentParser::lineNumber() const
229 return m_stream.lineNumber();
232 int XMLDocumentParser::columnNumber() const
234 return m_stream.columnNumber();
237 TextPosition0 XMLDocumentParser::textPosition() const
239 return TextPosition0(WTF::ZeroBasedNumber::fromZeroBasedInt(lineNumber()), WTF::ZeroBasedNumber::fromZeroBasedInt(columnNumber()));
242 // This method incorrectly reinterprets zero-base lineNumber method as one-based number.
243 // FIXME: This error is kept for compatibility. We should fix it eventually.
244 TextPosition1 XMLDocumentParser::textPositionOneBased() const
246 return TextPosition1(WTF::OneBasedNumber::fromOneBasedInt(lineNumber()), WTF::OneBasedNumber::fromOneBasedInt(columnNumber()));
249 void XMLDocumentParser::stopParsing()
251 ScriptableDocumentParser::stopParsing();
254 void XMLDocumentParser::resumeParsing()
256 ASSERT(m_parserPaused);
258 m_parserPaused = false;
260 // First, execute any pending callbacks
265 // Then, write any pending data
266 SegmentedString rest = m_pendingSrc;
267 m_pendingSrc.clear();
270 // Finally, if finish() has been called and append() didn't result
271 // in any further callbacks being queued, call end()
272 if (m_finishCalled && !m_parserPaused && !m_pendingScript)
276 bool XMLDocumentParser::appendFragmentSource(const String& source)
278 ASSERT(!m_sawFirstElement);
279 append(String("<qxmlstreamdummyelement>"));
281 append(String("</qxmlstreamdummyelement>"));
285 // --------------------------------
287 struct AttributeParseState {
288 HashMap<String, String> attributes;
292 static void attributesStartElementNsHandler(AttributeParseState* state, const QXmlStreamAttributes& attrs)
294 if (attrs.count() <= 0)
297 state->gotAttributes = true;
299 for (int i = 0; i < attrs.count(); i++) {
300 const QXmlStreamAttribute& attr = attrs[i];
301 String attrLocalName = attr.name();
302 String attrValue = attr.value();
303 String attrURI = attr.namespaceUri();
304 String attrQName = attr.qualifiedName();
305 state->attributes.set(attrQName, attrValue);
309 HashMap<String, String> parseAttributes(const String& string, bool& attrsOK)
311 AttributeParseState state;
312 state.gotAttributes = false;
314 QXmlStreamReader stream;
315 QString dummy = QString(QLatin1String("<?xml version=\"1.0\"?><attrs %1 />")).arg(string);
316 stream.addData(dummy);
317 while (!stream.atEnd()) {
319 if (stream.isStartElement()) {
320 attributesStartElementNsHandler(&state, stream.attributes());
323 attrsOK = state.gotAttributes;
324 return state.attributes;
327 static inline String prefixFromQName(const QString& qName)
329 const int offset = qName.indexOf(QLatin1Char(':'));
333 return qName.left(offset);
336 static inline void handleElementNamespaces(Element* newElement, const QXmlStreamNamespaceDeclarations &ns,
337 ExceptionCode& ec, FragmentScriptingPermission scriptingPermission)
339 for (int i = 0; i < ns.count(); ++i) {
340 const QXmlStreamNamespaceDeclaration &decl = ns[i];
341 String namespaceURI = decl.namespaceUri();
342 String namespaceQName = decl.prefix().isEmpty() ? String("xmlns") : String("xmlns:");
343 namespaceQName.append(decl.prefix());
344 newElement->setAttributeNS("http://www.w3.org/2000/xmlns/", namespaceQName, namespaceURI, ec, scriptingPermission);
345 if (ec) // exception setting attributes
350 static inline void handleElementAttributes(Element* newElement, const QXmlStreamAttributes &attrs, ExceptionCode& ec,
351 FragmentScriptingPermission scriptingPermission)
353 for (int i = 0; i < attrs.count(); ++i) {
354 const QXmlStreamAttribute &attr = attrs[i];
355 String attrLocalName = attr.name();
356 String attrValue = attr.value();
357 String attrURI = attr.namespaceUri().isEmpty() ? String() : String(attr.namespaceUri());
358 String attrQName = attr.qualifiedName();
359 newElement->setAttributeNS(attrURI, attrQName, attrValue, ec, scriptingPermission);
360 if (ec) // exception setting attributes
365 void XMLDocumentParser::parse()
367 while (!isStopped() && !m_parserPaused && !m_stream.atEnd()) {
369 switch (m_stream.tokenType()) {
370 case QXmlStreamReader::StartDocument: {
374 case QXmlStreamReader::EndDocument: {
378 case QXmlStreamReader::StartElement: {
380 if (document()->isXHTMLMPDocument() && !m_hasDocTypeDeclaration) {
381 handleError(fatal, "DOCTYPE declaration lost.", lineNumber(), columnNumber());
388 case QXmlStreamReader::EndElement: {
392 case QXmlStreamReader::Characters: {
393 if (m_stream.isCDATA()) {
402 case QXmlStreamReader::Comment: {
406 case QXmlStreamReader::DTD: {
407 //qDebug()<<"------------- DTD";
410 m_hasDocTypeDeclaration = true;
414 case QXmlStreamReader::EntityReference: {
415 //qDebug()<<"---------- ENTITY = "<<m_stream.name().toString()
416 // <<", t = "<<m_stream.text().toString();
417 if (isXHTMLDocument()
419 || isXHTMLMPDocument()
425 QString entity = m_stream.name().toString();
426 UChar c = decodeNamedEntity(entity.toUtf8().constData());
427 if (!m_currentNode->isTextNode())
429 ExceptionCode ec = 0;
431 // qDebug()<<" ------- adding entity "<<str;
432 static_cast<Text*>(m_currentNode)->appendData(str, ec);
436 case QXmlStreamReader::ProcessingInstruction: {
437 parseProcessingInstruction();
441 if (m_stream.error() != QXmlStreamReader::PrematureEndOfDocumentError) {
442 ErrorType type = (m_stream.error() == QXmlStreamReader::NotWellFormedError) ?
444 handleError(type, qPrintable(m_stream.errorString()), lineNumber(),
453 void XMLDocumentParser::startDocument()
455 initializeParserContext();
456 ExceptionCode ec = 0;
458 if (!m_parsingFragment) {
459 document()->setXMLStandalone(m_stream.isStandaloneDocument(), ec);
461 QStringRef version = m_stream.documentVersion();
462 if (!version.isEmpty())
463 document()->setXMLVersion(version, ec);
464 QStringRef encoding = m_stream.documentEncoding();
465 if (!encoding.isEmpty())
466 document()->setXMLEncoding(encoding);
470 void XMLDocumentParser::parseStartElement()
472 if (!m_sawFirstElement && m_parsingFragment) {
473 // skip dummy element for fragments
474 m_sawFirstElement = true;
480 String localName = m_stream.name();
481 String uri = m_stream.namespaceUri();
482 String prefix = prefixFromQName(m_stream.qualifiedName().toString());
484 if (m_parsingFragment && uri.isNull()) {
485 Q_ASSERT(prefix.isNull());
486 uri = m_defaultNamespaceURI;
489 QualifiedName qName(prefix, localName, uri);
490 RefPtr<Element> newElement = document()->createElement(qName, true);
497 if (!m_sawFirstElement && isXHTMLMPDocument()) {
498 // As per 7.1 section of OMA-WAP-XHTMLMP-V1_1-20061020-A.pdf,
499 // we should make sure that the root element MUST be 'html' and
500 // ensure the name of the default namespace on the root elment 'html'
501 // MUST be 'http://www.w3.org/1999/xhtml'
502 if (localName != HTMLNames::htmlTag.localName()) {
503 handleError(fatal, "XHTMLMP document expects 'html' as root element.", lineNumber(), columnNumber());
508 m_defaultNamespaceURI = HTMLNames::xhtmlNamespaceURI;
509 uri = m_defaultNamespaceURI;
510 m_stream.addExtraNamespaceDeclaration(QXmlStreamNamespaceDeclaration(prefix, HTMLNames::xhtmlNamespaceURI));
515 bool isFirstElement = !m_sawFirstElement;
516 m_sawFirstElement = true;
518 ExceptionCode ec = 0;
519 handleElementNamespaces(newElement.get(), m_stream.namespaceDeclarations(), ec, m_scriptingPermission);
525 handleElementAttributes(newElement.get(), m_stream.attributes(), ec, m_scriptingPermission);
531 ScriptElement* scriptElement = toScriptElement(newElement.get());
533 m_scriptStartPosition = textPositionOneBased();
535 m_currentNode->deprecatedParserAddChild(newElement.get());
537 pushCurrentNode(newElement.get());
538 if (m_view && !newElement->attached())
539 newElement->attach();
541 #if ENABLE(OFFLINE_WEB_APPLICATIONS)
542 if (newElement->hasTagName(HTMLNames::htmlTag))
543 static_cast<HTMLHtmlElement*>(newElement.get())->insertedByParser();
546 if (isFirstElement && document()->frame())
547 document()->frame()->loader()->dispatchDocumentElementAvailable();
550 void XMLDocumentParser::parseEndElement()
554 Node* n = m_currentNode;
555 n->finishParsingChildren();
557 if (m_scriptingPermission == FragmentScriptingNotAllowed && n->isElementNode() && toScriptElement(static_cast<Element*>(n))) {
564 if (!n->isElementNode() || !m_view) {
565 if (!m_currentNodeStack.isEmpty())
570 Element* element = static_cast<Element*>(n);
572 // The element's parent may have already been removed from document.
573 // Parsing continues in this case, but scripts aren't executed.
574 if (!element->inDocument()) {
579 ScriptElement* scriptElement = toScriptElement(element);
580 if (!scriptElement) {
585 // don't load external scripts for standalone documents (for now)
586 ASSERT(!m_pendingScript);
587 m_requestingScript = true;
590 if (!scriptElement->shouldExecuteAsJavaScript())
591 document()->setShouldProcessNoscriptElement(true);
595 String scriptHref = scriptElement->sourceAttributeValue();
596 if (!scriptHref.isEmpty()) {
597 // we have a src attribute
598 String scriptCharset = scriptElement->scriptCharset();
599 if (element->dispatchBeforeLoadEvent(scriptHref) &&
600 (m_pendingScript = document()->cachedResourceLoader()->requestScript(scriptHref, scriptCharset))) {
601 m_scriptElement = element;
602 m_pendingScript->addClient(this);
604 // m_pendingScript will be 0 if script was already loaded and ref() executed it
610 m_view->frame()->script()->executeScript(ScriptSourceCode(scriptElement->scriptContent(), document()->url(), m_scriptStartPosition));
612 m_requestingScript = false;
616 void XMLDocumentParser::parseCharacters()
618 if (!m_currentNode->isTextNode())
620 ExceptionCode ec = 0;
621 static_cast<Text*>(m_currentNode)->appendData(m_stream.text(), ec);
624 void XMLDocumentParser::parseProcessingInstruction()
628 // ### handle exceptions
630 RefPtr<ProcessingInstruction> pi = document()->createProcessingInstruction(
631 m_stream.processingInstructionTarget(),
632 m_stream.processingInstructionData(), exception);
636 pi->setCreatedByParser(true);
638 m_currentNode->deprecatedParserAddChild(pi.get());
639 if (m_view && !pi->attached())
642 pi->finishParsingChildren();
645 m_sawXSLTransform = !m_sawFirstElement && pi->isXSL();
646 if (m_sawXSLTransform && !document()->transformSourceDocument())
651 void XMLDocumentParser::parseCdata()
655 RefPtr<Node> newNode = CDATASection::create(document(), m_stream.text());
657 m_currentNode->deprecatedParserAddChild(newNode.get());
658 if (m_view && !newNode->attached())
662 void XMLDocumentParser::parseComment()
666 RefPtr<Node> newNode = Comment::create(document(), m_stream.text());
668 m_currentNode->deprecatedParserAddChild(newNode.get());
669 if (m_view && !newNode->attached())
673 void XMLDocumentParser::endDocument()
676 m_hasDocTypeDeclaration = false;
680 bool XMLDocumentParser::hasError() const
682 return m_stream.hasError();
685 void XMLDocumentParser::parseDtd()
687 QStringRef name = m_stream.dtdName();
688 QStringRef publicId = m_stream.dtdPublicId();
689 QStringRef systemId = m_stream.dtdSystemId();
691 //qDebug() << dtd << name << publicId << systemId;
692 if ((publicId == QLatin1String("-//W3C//DTD XHTML 1.0 Transitional//EN"))
693 || (publicId == QLatin1String("-//W3C//DTD XHTML 1.1//EN"))
694 || (publicId == QLatin1String("-//W3C//DTD XHTML 1.0 Strict//EN"))
695 || (publicId == QLatin1String("-//W3C//DTD XHTML 1.0 Frameset//EN"))
696 || (publicId == QLatin1String("-//W3C//DTD XHTML Basic 1.0//EN"))
697 || (publicId == QLatin1String("-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN"))
698 || (publicId == QLatin1String("-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"))
700 || (publicId == QLatin1String("-//WAPFORUM//DTD XHTML Mobile 1.0//EN"))
703 setIsXHTMLDocument(true); // controls if we replace entities or not.
705 else if ((publicId == QLatin1String("-//WAPFORUM//DTD XHTML Mobile 1.1//EN"))
706 || (publicId == QLatin1String("-//WAPFORUM//DTD XHTML Mobile 1.0//EN"))) {
707 if (AtomicString(name) != HTMLNames::htmlTag.localName()) {
708 handleError(fatal, "Invalid DOCTYPE declaration, expected 'html' as root element.", lineNumber(), columnNumber());
712 if (document()->isXHTMLMPDocument()) // check if the MIME type is correct with this method
713 setIsXHTMLMPDocument(true);
715 setIsXHTMLDocument(true);
719 else if (document()->isWMLDocument()
720 && publicId != QLatin1String("-//WAPFORUM//DTD WML 1.3//EN")
721 && publicId != QLatin1String("-//WAPFORUM//DTD WML 1.2//EN")
722 && publicId != QLatin1String("-//WAPFORUM//DTD WML 1.1//EN")
723 && publicId != QLatin1String("-//WAPFORUM//DTD WML 1.0//EN"))
724 handleError(fatal, "Invalid DTD Public ID", lineNumber(), columnNumber());
726 if (!m_parsingFragment)
727 document()->parserAddChild(DocumentType::create(document(), name, publicId, systemId));