2 This file is part of the KDE libraries
4 Copyright (C) 1997 Martin Jones (mjones@kde.org)
5 (C) 1997 Torben Weis (weis@kde.org)
6 (C) 1998 Waldo Bastian (bastian@kde.org)
7 (C) 1999 Lars Knoll (knoll@kde.org)
8 (C) 1999 Antti Koivisto (koivisto@kde.org)
9 (C) 2001 Dirk Mueller (mueller@kde.org)
10 Copyright (C) 2004, 2005, 2006 Apple Computer, Inc.
11 Copyright (C) 2005, 2006 Alexey Proskuryakov (ap@nypop.com)
13 This library is free software; you can redistribute it and/or
14 modify it under the terms of the GNU Library General Public
15 License as published by the Free Software Foundation; either
16 version 2 of the License, or (at your option) any later version.
18 This library is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 Library General Public License for more details.
23 You should have received a copy of the GNU Library General Public License
24 along with this library; see the file COPYING.LIB. If not, write to
25 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
26 Boston, MA 02111-1307, USA.
30 #include "HTMLTokenizer.h"
32 #include "CachedScript.h"
33 #include "DocLoader.h"
34 #include "DocumentFragment.h"
35 #include "EventNames.h"
37 #include "HTMLElement.h"
38 #include "SystemTime.h"
39 #include "csshelper.h"
40 #include "HTMLNames.h"
41 #include "HTMLParser.h"
42 #include "kjs_proxy.h"
44 #include "HTMLEntityNames.c"
46 // #define INSTRUMENT_LAYOUT_SCHEDULING 1
48 #define TOKENIZER_CHUNK_SIZE 4096
54 using namespace HTMLNames;
55 using namespace EventNames;
57 // FIXME: We would like this constant to be 200ms.
58 // Yielding more aggressively results in increased responsiveness and better incremental rendering.
59 // It slows down overall page-load on slower machines, though, so for now we set a value of 500.
60 const double tokenizerTimeDelay = 0.500;
62 static const char commentStart [] = "<!--";
63 static const char scriptEnd [] = "</script";
64 static const char xmpEnd [] = "</xmp";
65 static const char styleEnd [] = "</style";
66 static const char textareaEnd [] = "</textarea";
67 static const char titleEnd [] = "</title";
69 #define KHTML_ALLOC_QCHAR_VEC( N ) (QChar*) fastMalloc( sizeof(QChar)*( N ) )
70 #define KHTML_DELETE_QCHAR_VEC( P ) fastFree((char*)( P ))
72 // Full support for MS Windows extensions to Latin-1.
73 // Technically these extensions should only be activated for pages
74 // marked "windows-1252" or "cp1252", but
75 // in the standard Microsoft way, these extensions infect hundreds of thousands
76 // of web pages. Note that people with non-latin-1 Microsoft extensions
79 // See: http://www.microsoft.com/globaldev/reference/WinCP.asp
80 // http://www.bbsinc.com/iso8859.html
81 // http://www.obviously.com/
83 // There may be better equivalents
85 // We only need this for entities. For non-entity text, we handle this in the text encoding.
87 static const unsigned short windowsLatin1ExtensionArray[32] = {
88 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87
89 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F
90 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97
91 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178 // 98-9F
94 static inline QChar fixUpChar(QChar c)
96 unsigned short code = c.unicode();
97 if ((code & ~0x1F) != 0x0080)
99 return windowsLatin1ExtensionArray[code - 0x80];
102 inline bool tagMatch(const char *s1, const QChar *s2, unsigned length)
104 for (unsigned i = 0; i != length; ++i) {
106 char uc1 = toupper(c1);
108 if (c1 != c2 && uc1 != c2)
114 void Token::addAttribute(Document* doc, const AtomicString& attrName, const AtomicString& v)
117 if (!attrName.isEmpty() && attrName != "/") {
118 a = new MappedAttribute(attrName, v);
120 attrs = new NamedMappedAttrMap(0);
121 attrs->insertAttribute(a);
125 // ----------------------------------------------------------------------------
127 HTMLTokenizer::HTMLTokenizer(Document* doc)
131 , scriptCodeMaxSize(0)
132 , scriptCodeResync(0)
133 , m_executingScript(0)
134 , m_timer(this, &HTMLTokenizer::timerFired)
139 parser = new HTMLParser(doc);
143 HTMLTokenizer::HTMLTokenizer(DocumentFragment* frag)
147 , scriptCodeMaxSize(0)
148 , scriptCodeResync(0)
149 , m_executingScript(0)
150 , m_timer(this, &HTMLTokenizer::timerFired)
151 , m_doc(frag->document())
155 parser = new HTMLParser(frag);
159 void HTMLTokenizer::reset()
161 ASSERT(m_executingScript == 0);
163 while (!pendingScripts.isEmpty()) {
164 CachedScript *cs = pendingScripts.dequeue();
165 ASSERT(cs->accessCount() > 0);
170 KHTML_DELETE_QCHAR_VEC(buffer);
175 KHTML_DELETE_QCHAR_VEC(scriptCode);
177 scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0;
180 m_state.setAllowYield(false);
181 m_state.setForceSynchronous(false);
186 void HTMLTokenizer::begin()
188 m_executingScript = 0;
189 m_state.setLoadingExtScript(false);
192 buffer = KHTML_ALLOC_QCHAR_VEC( 255 );
196 m_state.setEntityState(NoEntity);
197 scriptSrc = DeprecatedString::null;
199 currentPrependingSrc = 0;
201 brokenComments = false;
202 brokenServer = false;
204 scriptStartLineno = 0;
206 m_state.setForceSynchronous(false);
209 void HTMLTokenizer::setForceSynchronous(bool force)
211 m_state.setForceSynchronous(force);
214 HTMLTokenizer::State HTMLTokenizer::processListing(SegmentedString list, State state)
216 // This function adds the listing 'list' as
217 // preformatted text-tokens to the token-collection
218 while (!list.isEmpty()) {
219 if (state.skipLF()) {
220 state.setSkipLF(false);
229 if (*list == '\n' || *list == '\r') {
230 if (state.discardLF())
232 state.setDiscardLF(false); // We have discarded 1 LF
236 /* Check for MS-DOS CRLF sequence */
238 state.setSkipLF(true);
242 state.setDiscardLF(false);
251 HTMLTokenizer::State HTMLTokenizer::parseSpecial(SegmentedString &src, State state)
253 ASSERT(state.inTextArea() || state.inTitle() || !state.hasEntityState());
254 ASSERT(!state.hasTagState());
255 ASSERT(state.inXmp() + state.inTextArea() + state.inTitle() + state.inStyle() + state.inScript() == 1 );
256 if (state.inScript())
257 scriptStartLineno = lineno + src.lineCount();
259 if (state.inComment())
260 state = parseComment(src, state);
262 while ( !src.isEmpty() ) {
264 unsigned char ch = src->latin1();
265 if (!scriptCodeResync && !brokenComments && !state.inTextArea() && !state.inXmp() && !state.inTitle() && ch == '-' && scriptCodeSize >= 3 && !src.escaped() && scriptCode[scriptCodeSize-3] == '<' && scriptCode[scriptCodeSize-2] == '!' && scriptCode[scriptCodeSize-1] == '-') {
266 state.setInComment(true);
267 state = parseComment(src, state);
270 if ( scriptCodeResync && !tquote && ( ch == '>' ) ) {
272 scriptCodeSize = scriptCodeResync-1;
273 scriptCodeResync = 0;
274 scriptCode[ scriptCodeSize ] = scriptCode[ scriptCodeSize + 1 ] = 0;
275 if (state.inScript())
276 state = scriptHandler(state);
278 state = processListing(SegmentedString(scriptCode, scriptCodeSize), state);
280 if (state.inStyle()) {
281 currToken.tagName = styleTag.localName();
282 currToken.beginTag = false;
283 } else if (state.inTextArea()) {
284 currToken.tagName = textareaTag.localName();
285 currToken.beginTag = false;
286 } else if (state.inTitle()) {
287 currToken.tagName = titleTag.localName();
288 currToken.beginTag = false;
289 } else if (state.inXmp()) {
290 currToken.tagName = xmpTag.localName();
291 currToken.beginTag = false;
294 state.setInStyle(false);
295 state.setInScript(false);
296 state.setInTextArea(false);
297 state.setInTitle(false);
298 state.setInXmp(false);
300 scriptCodeSize = scriptCodeResync = 0;
304 // possible end of tagname, lets check.
305 if ( !scriptCodeResync && !state.escaped() && !src.escaped() && ( ch == '>' || ch == '/' || ch <= ' ' ) && ch &&
306 scriptCodeSize >= searchStopperLen &&
307 tagMatch( searchStopper, scriptCode+scriptCodeSize-searchStopperLen, searchStopperLen )) {
308 scriptCodeResync = scriptCodeSize-searchStopperLen+1;
312 if ( scriptCodeResync && !state.escaped() ) {
314 tquote = (tquote == NoQuote) ? DoubleQuote : ((tquote == SingleQuote) ? SingleQuote : NoQuote);
316 tquote = (tquote == NoQuote) ? SingleQuote : (tquote == DoubleQuote) ? DoubleQuote : NoQuote;
317 else if (tquote != NoQuote && (ch == '\r' || ch == '\n'))
320 state.setEscaped(!state.escaped() && ch == '\\');
321 if (!scriptCodeResync && (state.inTextArea() || state.inTitle()) && !src.escaped() && ch == '&') {
322 QChar *scriptCodeDest = scriptCode+scriptCodeSize;
324 state = parseEntity(src, scriptCodeDest, state, m_cBufferPos, true, false);
325 scriptCodeSize = scriptCodeDest-scriptCode;
328 scriptCode[scriptCodeSize++] = *src;
336 HTMLTokenizer::State HTMLTokenizer::scriptHandler(State state)
338 // We are inside a <script>
339 bool doScriptExec = false;
341 // (Bugzilla 3837) Scripts following a frameset element should not execute or,
342 // in the case of extern scripts, even load.
343 bool followingFrameset = (parser->doc()->body() && parser->doc()->body()->hasTagName(framesetTag));
345 CachedScript* cs = 0;
346 // don't load external scripts for standalone documents (for now)
347 if (!scriptSrc.isEmpty() && parser->doc()->frame()) {
348 // forget what we just got; load from src url instead
349 if (!parser->skipMode() && !followingFrameset) {
350 #if INSTRUMENT_LAYOUT_SCHEDULING
351 if (!parser->doc()->ownerElement())
352 printf("Requesting script at time %d\n", parser->doc()->elapsedTime());
354 if ( (cs = parser->doc()->docLoader()->requestScript(scriptSrc, scriptSrcCharset) ))
355 pendingScripts.enqueue(cs);
360 scriptSrc=DeprecatedString::null;
364 kdDebug( 6036 ) << "---START SCRIPT---" << endl;
365 kdDebug( 6036 ) << DeprecatedString(scriptCode, scriptCodeSize) << endl;
366 kdDebug( 6036 ) << "---END SCRIPT---" << endl;
369 // Parse scriptCode containing <script> info
372 state = processListing(SegmentedString(scriptCode, scriptCodeSize), state);
373 DeprecatedString exScript( buffer, dest-buffer );
375 currToken.tagName = scriptTag.localName();
376 currToken.beginTag = false;
379 SegmentedString *savedPrependingSrc = currentPrependingSrc;
380 SegmentedString prependingSrc;
381 currentPrependingSrc = &prependingSrc;
382 if (!parser->skipMode() && !followingFrameset) {
384 if (savedPrependingSrc)
385 savedPrependingSrc->append(src);
387 pendingSrc.prepend(src);
388 setSrc(SegmentedString());
389 scriptCodeSize = scriptCodeResync = 0;
391 // the ref() call below may call notifyFinished if the script is already in cache,
392 // and that mucks with the state directly, so we must write it back to the object.
396 // will be 0 if script was already loaded and ref() executed it
397 if (!pendingScripts.isEmpty())
398 state.setLoadingExtScript(true);
400 else if (!m_fragment && doScriptExec && javascript ) {
401 if (!m_executingScript)
402 pendingSrc.prepend(src);
405 setSrc(SegmentedString());
406 scriptCodeSize = scriptCodeResync = 0;
407 state = scriptExecution(exScript, state, DeprecatedString::null, scriptStartLineno);
411 state.setInScript(false);
412 scriptCodeSize = scriptCodeResync = 0;
414 if (!m_executingScript && !state.loadingExtScript()) {
415 src.append(pendingSrc);
417 } else if (!prependingSrc.isEmpty()) {
418 // restore first so that the write appends in the right place
419 // (does not hurt to do it again below)
420 currentPrependingSrc = savedPrependingSrc;
422 // we need to do this slightly modified bit of one of the write() cases
423 // because we want to prepend to pendingSrc rather than appending
424 // if there's no previous prependingSrc
425 if (state.loadingExtScript()) {
426 if (currentPrependingSrc) {
427 currentPrependingSrc->append(prependingSrc);
429 pendingSrc.prepend(prependingSrc);
433 write(prependingSrc, false);
438 currentPrependingSrc = savedPrependingSrc;
443 HTMLTokenizer::State HTMLTokenizer::scriptExecution(const DeprecatedString& str, State state, DeprecatedString scriptURL, int baseLine)
445 if (m_fragment || !m_doc->frame())
447 bool oldscript = state.inScript();
449 state.setInScript(false);
450 DeprecatedString url = scriptURL.isNull() ? m_doc->frame()->document()->URL() : scriptURL;
452 SegmentedString *savedPrependingSrc = currentPrependingSrc;
453 SegmentedString prependingSrc;
454 currentPrependingSrc = &prependingSrc;
456 #if INSTRUMENT_LAYOUT_SCHEDULING
457 if (!parser->doc()->ownerElement())
458 printf("beginning script execution at %d\n", parser->doc()->elapsedTime());
462 m_doc->frame()->executeScript(url,baseLine,0,str);
465 state.setAllowYield(true);
467 #if INSTRUMENT_LAYOUT_SCHEDULING
468 if (!parser->doc()->ownerElement())
469 printf("ending script execution at %d\n", parser->doc()->elapsedTime());
473 state.setInScript(oldscript);
475 if (!m_executingScript && !state.loadingExtScript()) {
476 src.append(pendingSrc);
478 } else if (!prependingSrc.isEmpty()) {
479 // restore first so that the write appends in the right place
480 // (does not hurt to do it again below)
481 currentPrependingSrc = savedPrependingSrc;
483 // we need to do this slightly modified bit of one of the write() cases
484 // because we want to prepend to pendingSrc rather than appending
485 // if there's no previous prependingSrc
486 if (state.loadingExtScript()) {
487 if (currentPrependingSrc)
488 currentPrependingSrc->append(prependingSrc);
490 pendingSrc.prepend(prependingSrc);
493 write(prependingSrc, false);
498 currentPrependingSrc = savedPrependingSrc;
503 HTMLTokenizer::State HTMLTokenizer::parseComment(SegmentedString &src, State state)
505 // FIXME: Why does this code even run for comments inside <script> and <style>? This seems bogus.
506 bool strict = !parser->doc()->inCompatMode() && !state.inScript() && !state.inStyle();
507 int delimiterCount = 0;
508 bool canClose = false;
509 checkScriptBuffer(src.length());
510 while ( !src.isEmpty() ) {
511 scriptCode[ scriptCodeSize++ ] = *src;
512 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
513 qDebug("comment is now: *%s*",
514 QConstString((QChar*)src.operator->(), min(16U, src.length())).deprecatedString().latin1());
518 if (src->unicode() == '-') {
520 if (delimiterCount == 2) {
522 canClose = !canClose;
529 if ((!strict || canClose) && src->unicode() == '>') {
530 bool handleBrokenComments = brokenComments && !(state.inScript() || state.inStyle());
531 int endCharsCount = 1; // start off with one for the '>' character
533 // In quirks mode just check for -->
534 if (scriptCodeSize > 2 && scriptCode[scriptCodeSize-3] == '-' && scriptCode[scriptCodeSize-2] == '-') {
537 else if (scriptCodeSize > 3 && scriptCode[scriptCodeSize-4] == '-' && scriptCode[scriptCodeSize-3] == '-' &&
538 scriptCode[scriptCodeSize-2] == '!') {
539 // Other browsers will accept --!> as a close comment, even though it's
540 // not technically valid.
544 if (canClose || handleBrokenComments || endCharsCount > 1) {
546 if (!(state.inScript() || state.inXmp() || state.inTextArea() || state.inStyle())) {
547 #ifdef INCLUDE_COMMENTS_IN_DOM // FIXME: Turn this on soon.
549 scriptCode[scriptCodeSize] = 0;
550 scriptCode[scriptCodeSize + 1] = 0;
551 currToken.tagName = commentAtom;
552 currToken.beginTag = true;
553 state = processListing(SegmentedString(scriptCode, scriptCodeSize - endCharsCount), state);
555 currToken.tagName = commentAtom;
556 currToken.beginTag = false;
561 state.setInComment(false);
562 return state; // Finished parsing comment
571 HTMLTokenizer::State HTMLTokenizer::parseServer(SegmentedString& src, State state)
573 checkScriptBuffer(src.length());
574 while (!src.isEmpty()) {
575 scriptCode[scriptCodeSize++] = *src;
576 if (src->unicode() == '>' &&
577 scriptCodeSize > 1 && scriptCode[scriptCodeSize-2] == '%') {
579 state.setInServer(false);
581 return state; // Finished parsing server include
588 HTMLTokenizer::State HTMLTokenizer::parseProcessingInstruction(SegmentedString &src, State state)
591 while ( !src.isEmpty() )
593 unsigned char chbegin = src->latin1();
594 if(chbegin == '\'') {
595 tquote = tquote == SingleQuote ? NoQuote : SingleQuote;
597 else if(chbegin == '\"') {
598 tquote = tquote == DoubleQuote ? NoQuote : DoubleQuote;
601 // some crappy sites omit the "?" before it, so
602 // we look for an unquoted '>' instead. (IE compatible)
603 else if ( chbegin == '>' && ( !tquote || oldchar == '?' ) )
605 // We got a '?>' sequence
606 state.setInProcessingInstruction(false);
608 state.setDiscardLF(true);
609 return state; // Finished parsing comment!
618 HTMLTokenizer::State HTMLTokenizer::parseText(SegmentedString &src, State state)
620 while (!src.isEmpty()) {
621 unsigned short cc = src->unicode();
623 if (state.skipLF()) {
624 state.setSkipLF(false);
631 // do we need to enlarge the buffer?
635 state.setSkipLF(true);
646 HTMLTokenizer::State HTMLTokenizer::parseEntity(SegmentedString &src, QChar *&dest, State state, unsigned &cBufferPos, bool start, bool parsingTag)
651 state.setEntityState(SearchEntity);
652 EntityUnicodeValue = 0;
655 while(!src.isEmpty())
657 unsigned short cc = src->unicode();
658 switch(state.entityState()) {
660 ASSERT(state.entityState() != NoEntity);
665 cBuffer[cBufferPos++] = cc;
667 state.setEntityState(NumericSearch);
670 state.setEntityState(EntityName);
675 if(cc == 'x' || cc == 'X') {
676 cBuffer[cBufferPos++] = cc;
678 state.setEntityState(Hexadecimal);
680 else if(cc >= '0' && cc <= '9')
681 state.setEntityState(Decimal);
683 state.setEntityState(SearchSemicolon);
689 int ll = min(src.length(), 10-cBufferPos);
691 QChar csrc(src->lower());
694 if(csrc.row() || !((cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'f'))) {
695 state.setEntityState(SearchSemicolon);
698 EntityUnicodeValue = EntityUnicodeValue*16 + (cc - ( cc < 'a' ? '0' : 'a' - 10));
699 cBuffer[cBufferPos++] = cc;
702 if (cBufferPos == 10)
703 state.setEntityState(SearchSemicolon);
708 int ll = min(src.length(), 9-cBufferPos);
712 if(src->row() || !(cc >= '0' && cc <= '9')) {
713 state.setEntityState(SearchSemicolon);
717 EntityUnicodeValue = EntityUnicodeValue * 10 + (cc - '0');
718 cBuffer[cBufferPos++] = cc;
722 state.setEntityState(SearchSemicolon);
727 int ll = min(src.length(), 9-cBufferPos);
732 if(csrc.row() || !((cc >= 'a' && cc <= 'z') ||
733 (cc >= '0' && cc <= '9') || (cc >= 'A' && cc <= 'Z'))) {
734 state.setEntityState(SearchSemicolon);
738 cBuffer[cBufferPos++] = cc;
742 state.setEntityState(SearchSemicolon);
743 if (state.entityState() == SearchSemicolon) {
745 const Entity *e = findEntity(cBuffer, cBufferPos);
747 EntityUnicodeValue = e->code;
750 if(parsingTag && EntityUnicodeValue > 255 && *src != ';')
751 EntityUnicodeValue = 0;
757 case SearchSemicolon:
758 // Don't allow values that are more than 21 bits.
759 if (EntityUnicodeValue > 0 && EntityUnicodeValue <= 0x1FFFFF) {
764 if (EntityUnicodeValue <= 0xFFFF) {
766 src.push(fixUpChar(EntityUnicodeValue));
768 // Convert to UTF-16, using surrogate code points.
769 QChar c1(0xD800 | (((EntityUnicodeValue >> 16) - 1) << 6) | ((EntityUnicodeValue >> 10) & 0x3F));
770 QChar c2(0xDC00 | (EntityUnicodeValue & 0x3FF));
777 // ignore the sequence, add it to the buffer as plaintext
779 for(unsigned int i = 0; i < cBufferPos; i++)
780 dest[i] = cBuffer[i];
784 state.setEntityState(NoEntity);
792 HTMLTokenizer::State HTMLTokenizer::parseTag(SegmentedString &src, State state)
794 ASSERT(!state.hasEntityState());
796 unsigned cBufferPos = m_cBufferPos;
798 while (!src.isEmpty())
801 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
803 while(l < src.length() && (*(src.operator->()+l)).latin1() != '>')
805 qDebug("src is now: *%s*, tquote: %d",
806 QConstString((QChar*)src.operator->(), l).deprecatedString().latin1(), tquote);
808 switch(state.tagState()) {
811 m_cBufferPos = cBufferPos;
816 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
821 if (*src == commentStart[searchCount])
824 if (searchCount == 4)
827 kdDebug( 6036 ) << "Found comment" << endl;
829 // Found '<!--' sequence
831 dest = buffer; // ignore the previous part of this tag
832 state.setInComment(true);
833 state.setTagState(NoTag);
835 // Fix bug 34302 at kde.bugs.org. Go ahead and treat
836 // <!--> as a valid comment, since both mozilla and IE on windows
837 // can handle this case. Only do this in quirks mode. -dwh
838 if (!src.isEmpty() && *src == '>' && parser->doc()->inCompatMode()) {
839 state.setInComment(false);
842 cBuffer[cBufferPos++] = src->cell();
845 state = parseComment(src, state);
847 m_cBufferPos = cBufferPos;
848 return state; // Finished parsing tag!
850 // cuts of high part, is okay
851 cBuffer[cBufferPos++] = src->cell();
856 searchCount = 0; // Stop looking for '<!--' sequence
860 unsigned int ll = min(src.length(), CBUFLEN-cBufferPos);
862 unsigned short curchar = src->unicode();
863 if(curchar <= ' ' || curchar == '>' ) {
868 // tolower() shows up on profiles. This is faster!
869 if (curchar >= 'A' && curchar <= 'Z')
870 cBuffer[cBufferPos++] = curchar + ('a' - 'A');
872 cBuffer[cBufferPos++] = curchar;
876 // Disadvantage: we add the possible rest of the tag
877 // as attribute names. ### judge if this causes problems
878 if(finish || CBUFLEN == cBufferPos) {
881 unsigned int len = cBufferPos;
882 cBuffer[cBufferPos] = '\0';
883 if ((cBufferPos > 0) && (*ptr == '/')) {
893 // Ignore the / in fake xml tags like <br/>. We trim off the "/" so that we'll get "br" as the tag name and not "br/".
894 if (len > 1 && ptr[len-1] == '/')
897 // Now that we've shaved off any invalid / that might have followed the name), make the tag.
898 // FIXME: FireFox and WinIE turn !foo nodes into comments, we ignore comments. (fast/parser/tag-with-exclamation-point.html)
900 currToken.tagName = AtomicString(ptr);
901 currToken.beginTag = beginTag;
904 state.setTagState(SearchAttribute);
909 case SearchAttribute:
910 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
911 qDebug("SearchAttribute");
913 while(!src.isEmpty()) {
914 unsigned short curchar = src->unicode();
915 // In this mode just ignore any quotes we encounter and treat them like spaces.
916 if (curchar > ' ' && curchar != '\'' && curchar != '"') {
917 if (curchar == '<' || curchar == '>')
918 state.setTagState(SearchEnd);
920 state.setTagState(AttributeName);
930 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
931 qDebug("AttributeName");
933 int ll = min(src.length(), CBUFLEN-cBufferPos);
935 unsigned short curchar = src->unicode();
936 if (curchar <= '>' && (curchar >= '=' || curchar <= ' ')) {
937 cBuffer[cBufferPos] = '\0';
938 attrName = AtomicString(cBuffer);
941 state.setTagState(SearchEqual);
942 // This is a deliberate quirk to match Mozilla and Opera. We have to do this
943 // since sites that use the "standards-compliant" path sometimes send
944 // <script src="foo.js"/>. Both Moz and Opera will honor this, despite it
945 // being bogus HTML. They do not honor the "/" for other tags. This behavior
946 // also deviates from WinIE, but in this case we'll just copy Moz and Opera.
947 if (currToken.tagName == scriptTag && curchar == '>' && attrName == "/")
948 currToken.flat = true;
952 // tolower() shows up on profiles. This is faster!
953 if (curchar >= 'A' && curchar <= 'Z')
954 cBuffer[cBufferPos++] = curchar + ('a' - 'A');
956 cBuffer[cBufferPos++] = curchar;
959 if ( cBufferPos == CBUFLEN ) {
960 cBuffer[cBufferPos] = '\0';
961 attrName = AtomicString(cBuffer);
964 state.setTagState(SearchEqual);
969 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
970 qDebug("SearchEqual");
972 while(!src.isEmpty()) {
973 unsigned short curchar = src->unicode();
974 // In this mode just ignore any quotes we encounter and treat them like spaces.
975 if (curchar > ' ' && curchar != '\'' && curchar != '"') {
978 kdDebug(6036) << "found equal" << endl;
980 state.setTagState(SearchValue);
984 currToken.addAttribute(parser->doc(), attrName, emptyAtom);
986 state.setTagState(SearchAttribute);
994 while(!src.isEmpty()) {
995 unsigned short curchar = src->unicode();
997 if(( curchar == '\'' || curchar == '\"' )) {
998 tquote = curchar == '\"' ? DoubleQuote : SingleQuote;
999 state.setTagState(QuotedValue);
1002 state.setTagState(Value);
1010 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
1011 qDebug("QuotedValue");
1013 while(!src.isEmpty()) {
1016 unsigned short curchar = src->unicode();
1017 if (curchar == '>' && attrName.isEmpty()) {
1018 // Handle a case like <img '>. Just go ahead and be willing
1019 // to close the whole tag. Don't consume the character and
1020 // just go back into SearchEnd while ignoring the whole
1022 // FIXME: Note that this is actually not a very good solution. It's
1023 // an interim hack and doesn't handle the general case of
1024 // unmatched quotes among attributes that have names. -dwh
1025 while(dest > buffer+1 && (*(dest-1) == '\n' || *(dest-1) == '\r'))
1026 dest--; // remove trailing newlines
1027 AtomicString v(buffer+1, dest-buffer-1);
1028 attrName = v; // Just make the name/value match. (FIXME: Is this some WinIE quirk?)
1029 currToken.addAttribute(parser->doc(), attrName, v);
1030 state.setTagState(SearchAttribute);
1036 if(curchar <= '\'' && !src.escaped()) {
1037 // ### attributes like '&{blaa....};' are supposed to be treated as jscript.
1038 if ( curchar == '&' )
1041 state = parseEntity(src, dest, state, cBufferPos, true, true);
1044 else if ( (tquote == SingleQuote && curchar == '\'') ||
1045 (tquote == DoubleQuote && curchar == '\"') )
1047 // some <input type=hidden> rely on trailing spaces. argh
1048 while(dest > buffer+1 && (*(dest-1) == '\n' || *(dest-1) == '\r'))
1049 dest--; // remove trailing newlines
1050 AtomicString v(buffer+1, dest-buffer-1);
1051 if (attrName.isEmpty())
1052 attrName = v; // Make the name match the value. (FIXME: Is this a WinIE quirk?)
1053 currToken.addAttribute(parser->doc(), attrName, v);
1056 state.setTagState(SearchAttribute);
1067 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
1070 while(!src.isEmpty()) {
1072 unsigned short curchar = src->unicode();
1073 if(curchar <= '>' && !src.escaped()) {
1075 if ( curchar == '&' )
1078 state = parseEntity(src, dest, state, cBufferPos, true, true);
1081 // no quotes. Every space means end of value
1082 // '/' does not delimit in IE!
1083 if ( curchar <= ' ' || curchar == '>' )
1085 AtomicString v(buffer+1, dest-buffer-1);
1086 currToken.addAttribute(parser->doc(), attrName, v);
1088 state.setTagState(SearchAttribute);
1099 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
1100 qDebug("SearchEnd");
1102 while(!src.isEmpty()) {
1103 if (*src == '>' || *src == '<')
1107 currToken.flat = true;
1111 if (src.isEmpty()) break;
1113 searchCount = 0; // Stop looking for '<!--' sequence
1114 state.setTagState(NoTag);
1120 if (currToken.tagName == nullAtom) { //stop if tag is unknown
1121 m_cBufferPos = cBufferPos;
1125 AtomicString tagName = currToken.tagName;
1126 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 0
1127 kdDebug( 6036 ) << "appending Tag: " << tagName.deprecatedString() << endl;
1130 // Handle <script src="foo"/> like Mozilla/Opera. We have to do this now for Dashboard
1132 bool isSelfClosingScript = currToken.flat && currToken.beginTag && currToken.tagName == scriptTag;
1133 bool beginTag = !currToken.flat && currToken.beginTag;
1134 if (currToken.beginTag && currToken.tagName == scriptTag) {
1136 bool foundTypeAttribute = false;
1137 scriptSrc = DeprecatedString::null;
1138 scriptSrcCharset = DeprecatedString::null;
1139 if ( currToken.attrs && /* potentially have a ATTR_SRC ? */
1140 parser->doc()->frame() &&
1141 parser->doc()->frame()->jScriptEnabled() && /* jscript allowed at all? */
1142 !m_fragment /* are we a regular tokenizer or just for innerHTML ? */
1144 if ((a = currToken.attrs->getAttributeItem(srcAttr)))
1145 scriptSrc = parser->doc()->completeURL(parseURL(a->value()).deprecatedString());
1146 if ((a = currToken.attrs->getAttributeItem(charsetAttr)))
1147 scriptSrcCharset = a->value().deprecatedString().stripWhiteSpace();
1148 if ( scriptSrcCharset.isEmpty() )
1149 scriptSrcCharset = parser->doc()->frame()->encoding();
1150 /* Check type before language, since language is deprecated */
1151 if ((a = currToken.attrs->getAttributeItem(typeAttr)) != 0 && !a->value().isEmpty())
1152 foundTypeAttribute = true;
1154 a = currToken.attrs->getAttributeItem(languageAttr);
1158 if( foundTypeAttribute ) {
1160 Mozilla 1.5 accepts application/x-javascript, and some web references claim it is the only
1161 correct variation, but WinIE 6 doesn't accept it.
1162 Neither Mozilla 1.5 nor WinIE 6 accept application/javascript, application/ecmascript, or
1163 application/x-ecmascript.
1164 Mozilla 1.5 doesn't accept the text/javascript1.x formats, but WinIE 6 does.
1165 Mozilla 1.5 doesn't accept text/jscript, text/ecmascript, and text/livescript, but WinIE 6 does.
1166 Mozilla 1.5 allows leading and trailing whitespace, but WinIE 6 doesn't.
1167 Mozilla 1.5 and WinIE 6 both accept the empty string, but neither accept a whitespace-only string.
1168 We want to accept all the values that either of these browsers accept, but not other values.
1170 DeprecatedString type = a->value().deprecatedString().stripWhiteSpace().lower();
1171 if( type.compare("application/x-javascript") != 0 &&
1172 type.compare("text/javascript") != 0 &&
1173 type.compare("text/javascript1.0") != 0 &&
1174 type.compare("text/javascript1.1") != 0 &&
1175 type.compare("text/javascript1.2") != 0 &&
1176 type.compare("text/javascript1.3") != 0 &&
1177 type.compare("text/javascript1.4") != 0 &&
1178 type.compare("text/javascript1.5") != 0 &&
1179 type.compare("text/jscript") != 0 &&
1180 type.compare("text/ecmascript") != 0 &&
1181 type.compare("text/livescript") )
1185 Mozilla 1.5 doesn't accept jscript or ecmascript, but WinIE 6 does.
1186 Mozilla 1.5 accepts javascript1.0, javascript1.4, and javascript1.5, but WinIE 6 accepts only 1.1 - 1.3.
1187 Neither Mozilla 1.5 nor WinIE 6 accept leading or trailing whitespace.
1188 We want to accept all the values that either of these browsers accept, but not other values.
1190 String lang = a->value().domString().lower();
1192 lang != "javascript" &&
1193 lang != "javascript1.0" &&
1194 lang != "javascript1.1" &&
1195 lang != "javascript1.2" &&
1196 lang != "javascript1.3" &&
1197 lang != "javascript1.4" &&
1198 lang != "javascript1.5" &&
1199 lang != "ecmascript" &&
1200 lang != "livescript" &&
1206 RefPtr<Node> n = processToken();
1208 if (tagName == preTag || tagName == listingTag) {
1210 state.setDiscardLF(true); // Discard the first LF after we open a pre.
1211 } else if (tagName == scriptTag) {
1212 ASSERT(!scriptNode);
1215 searchStopper = scriptEnd;
1216 searchStopperLen = 8;
1217 state.setInScript(true);
1218 state = parseSpecial(src, state);
1219 } else if (isSelfClosingScript) { // Handle <script src="foo"/>
1220 state.setInScript(true);
1221 state = scriptHandler(state);
1223 } else if (tagName == styleTag) {
1225 searchStopper = styleEnd;
1226 searchStopperLen = 7;
1227 state.setInStyle(true);
1228 state = parseSpecial(src, state);
1230 } else if (tagName == textareaTag) {
1232 searchStopper = textareaEnd;
1233 searchStopperLen = 10;
1234 state.setInTextArea(true);
1235 state = parseSpecial(src, state);
1237 } else if (tagName == titleTag) {
1239 searchStopper = titleEnd;
1240 searchStopperLen = 7;
1241 State savedState = state;
1242 SegmentedString savedSrc = src;
1243 long savedLineno = lineno;
1244 state.setInTitle(true);
1245 state = parseSpecial(src, state);
1246 if (state.inTitle() && src.isEmpty()) {
1247 // We just ate the rest of the document as the title #text node!
1248 // Reset the state then retokenize without special title handling.
1249 // Let the parser clean up the missing </title> tag.
1250 // FIXME: This is incorrect, because src.isEmpty() doesn't mean we're
1251 // at the end of the document unless noMoreData is also true. We need
1252 // to detect this case elsewhere, and save the state somewhere other
1253 // than a local variable.
1256 lineno = savedLineno;
1260 } else if (tagName == xmpTag) {
1262 searchStopper = xmpEnd;
1263 searchStopperLen = 5;
1264 state.setInXmp(true);
1265 state = parseSpecial(src, state);
1267 } else if (tagName == selectTag)
1268 state.setInSelect(beginTag);
1269 else if (tagName == plaintextTag)
1270 state.setInPlainText(beginTag);
1271 m_cBufferPos = cBufferPos;
1272 return state; // Finished parsing tag!
1276 m_cBufferPos = cBufferPos;
1280 inline bool HTMLTokenizer::continueProcessing(int& processedCount, double startTime, State &state)
1282 // We don't want to be checking elapsed time with every character, so we only check after we've
1283 // processed a certain number of characters.
1284 bool allowedYield = state.allowYield();
1285 state.setAllowYield(false);
1286 if (!state.loadingExtScript() && !state.forceSynchronous() && !m_executingScript && (processedCount > TOKENIZER_CHUNK_SIZE || allowedYield)) {
1288 if (currentTime() - startTime > tokenizerTimeDelay) {
1289 /* FIXME: We'd like to yield aggressively to give stylesheets the opportunity to
1290 load, but this hurts overall performance on slower machines. For now turn this
1292 || (!parser->doc()->haveStylesheetsLoaded() &&
1293 (parser->doc()->documentElement()->id() != ID_HTML || parser->doc()->body()))) {*/
1294 // Schedule the timer to keep processing as soon as possible.
1295 m_timer.startOneShot(0);
1296 #if INSTRUMENT_LAYOUT_SCHEDULING
1297 if (currentTime() - startTime > tokenizerTimeDelay)
1298 printf("Deferring processing of data because 500ms elapsed away from event loop.\n");
1308 bool HTMLTokenizer::write(const SegmentedString &str, bool appendData)
1311 kdDebug( 6036 ) << this << " Tokenizer::write(\"" << str.toString() << "\"," << appendData << ")" << endl;
1317 if (m_parserStopped)
1320 if ( ( m_executingScript && appendData ) || !pendingScripts.isEmpty() ) {
1321 // don't parse; we will do this later
1322 if (currentPrependingSrc) {
1323 currentPrependingSrc->append(str);
1325 pendingSrc.append(str);
1335 // Once a timer is set, it has control of when the tokenizer continues.
1336 if (m_timer.isActive())
1339 bool wasInWrite = inWrite;
1342 #if INSTRUMENT_LAYOUT_SCHEDULING
1343 if (!parser->doc()->ownerElement())
1344 printf("Beginning write at time %d\n", parser->doc()->elapsedTime());
1347 int processedCount = 0;
1348 double startTime = currentTime();
1350 Frame *frame = parser->doc()->frame();
1352 State state = m_state;
1354 while (!src.isEmpty() && (!frame || !frame->isScheduledLocationChangePending())) {
1355 if (!continueProcessing(processedCount, startTime, state))
1358 // do we need to enlarge the buffer?
1361 unsigned short cc = src->unicode();
1363 bool wasSkipLF = state.skipLF();
1365 state.setSkipLF(false);
1367 if (wasSkipLF && (cc == '\n'))
1369 else if (state.needsSpecialWriteHandling()) {
1370 // it's important to keep needsSpecialWriteHandling with the flags this block tests
1371 if (state.hasEntityState())
1372 state = parseEntity(src, dest, state, m_cBufferPos, false, state.hasTagState());
1373 else if (state.inPlainText())
1374 state = parseText(src, state);
1375 else if (state.inAnySpecial())
1376 state = parseSpecial(src, state);
1377 else if (state.inComment())
1378 state = parseComment(src, state);
1379 else if (state.inServer())
1380 state = parseServer(src, state);
1381 else if (state.inProcessingInstruction())
1382 state = parseProcessingInstruction(src, state);
1383 else if (state.hasTagState())
1384 state = parseTag(src, state);
1385 else if (state.startTag()) {
1386 state.setStartTag(false);
1393 searchCount = 1; // Look for '<!--' sequence to start comment
1398 // xml processing instruction
1399 state.setInProcessingInstruction(true);
1401 state = parseProcessingInstruction(src, state);
1407 if (!brokenServer) {
1408 // <% server stuff, handle as comment %>
1409 state.setInServer(true);
1411 state = parseServer(src, state);
1414 // else fall through
1416 if( ((cc >= 'a') && (cc <= 'z')) || ((cc >= 'A') && (cc <= 'Z'))) {
1417 // Start of a Start-Tag
1431 state.setTagState(TagName);
1432 state = parseTag(src, state);
1434 } else if (cc == '&' && !src.escaped()) {
1436 state = parseEntity(src, dest, state, m_cBufferPos, true, state.hasTagState());
1437 } else if (cc == '<' && !src.escaped()) {
1438 tagStartLineno = lineno+src.lineCount();
1440 state.setStartTag(true);
1441 } else if (cc == '\n' || cc == '\r') {
1442 if (state.discardLF())
1444 state.setDiscardLF(false); // We have discarded 1 LF
1449 /* Check for MS-DOS CRLF sequence */
1451 state.setSkipLF(true);
1454 state.setDiscardLF(false);
1460 #if INSTRUMENT_LAYOUT_SCHEDULING
1461 if (!parser->doc()->ownerElement())
1462 printf("Ending write at time %d\n", parser->doc()->elapsedTime());
1465 inWrite = wasInWrite;
1469 if (noMoreData && !inWrite && !state.loadingExtScript() && !m_executingScript && !m_timer.isActive()) {
1470 end(); // this actually causes us to be deleted
1476 void HTMLTokenizer::stopParsing()
1478 Tokenizer::stopParsing();
1481 // The part needs to know that the tokenizer has finished with its data,
1482 // regardless of whether it happened naturally or due to manual intervention.
1483 if (!m_fragment && m_doc->frame())
1484 m_doc->frame()->tokenizerProcessedData();
1487 bool HTMLTokenizer::processingData() const
1489 return m_timer.isActive();
1492 void HTMLTokenizer::timerFired(Timer<HTMLTokenizer>*)
1494 #if INSTRUMENT_LAYOUT_SCHEDULING
1495 if (!parser->doc()->ownerElement())
1496 printf("Beginning timer write at time %d\n", parser->doc()->elapsedTime());
1499 if (parser->doc()->view() && parser->doc()->view()->layoutPending() && !parser->doc()->minimumLayoutDelay()) {
1500 // Restart the timer and let layout win. This is basically a way of ensuring that the layout
1501 // timer has higher priority than our timer.
1502 m_timer.startOneShot(0);
1506 RefPtr<Frame> frame = m_fragment ? 0 : m_doc->frame();
1508 // Invoke write() as though more data came in.
1509 bool didCallEnd = write(SegmentedString(), true);
1511 // If we called end() during the write, we need to let WebKit know that we're done processing the data.
1512 if (didCallEnd && frame)
1513 frame->tokenizerProcessedData();
1516 void HTMLTokenizer::end()
1518 ASSERT(!m_timer.isActive());
1519 m_timer.stop(); // Only helps if assertion above fires, but do it anyway.
1522 // parseTag is using the buffer for different matters
1523 if (!m_state.hasTagState())
1527 KHTML_DELETE_QCHAR_VEC(scriptCode);
1529 scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0;
1531 KHTML_DELETE_QCHAR_VEC(buffer);
1538 void HTMLTokenizer::finish()
1540 // do this as long as we don't find matching comment ends
1541 while((m_state.inComment() || m_state.inServer()) && scriptCode && scriptCodeSize) {
1542 // we've found an unmatched comment start
1543 if (m_state.inComment())
1544 brokenComments = true;
1546 brokenServer = true;
1547 checkScriptBuffer();
1548 scriptCode[scriptCodeSize] = 0;
1549 scriptCode[scriptCodeSize + 1] = 0;
1551 DeprecatedString food;
1552 if (m_state.inScript() || m_state.inStyle())
1553 food.setUnicode(scriptCode, scriptCodeSize);
1554 else if (m_state.inServer()) {
1556 food += DeprecatedString(scriptCode, scriptCodeSize);
1558 pos = QConstString(scriptCode, scriptCodeSize).string().find('>');
1559 food.setUnicode(scriptCode+pos+1, scriptCodeSize-pos-1); // deep copy
1561 KHTML_DELETE_QCHAR_VEC(scriptCode);
1563 scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0;
1564 m_state.setInComment(false);
1565 m_state.setInServer(false);
1566 if (!food.isEmpty())
1569 // this indicates we will not receive any more data... but if we are waiting on
1570 // an external script to load, we can't finish parsing until that is done
1572 if (!inWrite && !m_state.loadingExtScript() && !m_executingScript && !m_timer.isActive())
1573 end(); // this actually causes us to be deleted
1576 PassRefPtr<Node> HTMLTokenizer::processToken()
1578 KJSProxy* jsProxy = (!m_fragment && m_doc->frame()) ? m_doc->frame()->jScript() : 0;
1580 jsProxy->setEventHandlerLineno(tagStartLineno);
1581 if (dest > buffer) {
1583 if(currToken.tagName.length()) {
1584 qDebug( "unexpected token: %s, str: *%s*", currToken.tagName.deprecatedString().latin1(),QConstString( buffer,dest-buffer ).deprecatedString().latin1() );
1589 currToken.text = new StringImpl( buffer, dest - buffer );
1590 if (currToken.tagName != commentAtom)
1591 currToken.tagName = textAtom;
1592 } else if (currToken.tagName == nullAtom) {
1595 jsProxy->setEventHandlerLineno(lineno+src.lineCount());
1602 DeprecatedString name = currToken.tagName.deprecatedString();
1603 DeprecatedString text;
1605 text = QConstString(currToken.text->unicode(), currToken.text->length()).deprecatedString();
1607 kdDebug( 6036 ) << "Token --> " << name << endl;
1609 kdDebug( 6036 ) << "Token is FLAT!" << endl;
1611 kdDebug( 6036 ) << "text: \"" << text << "\"" << endl;
1612 unsigned l = currToken.attrs ? currToken.attrs->length() : 0;
1614 kdDebug( 6036 ) << "Attributes: " << l << endl;
1615 for (unsigned i = 0; i < l; ++i) {
1616 Attribute* c = currToken.attrs->attributeItem(i);
1617 kdDebug( 6036 ) << " " << c->localName().deprecatedString()
1618 << "=\"" << c->value().deprecatedString() << "\"" << endl;
1621 kdDebug( 6036 ) << endl;
1626 if (!m_parserStopped)
1627 // pass the token over to the parser, the parser DOES NOT delete the token
1628 n = parser->parseToken(&currToken);
1632 jsProxy->setEventHandlerLineno(0);
1637 HTMLTokenizer::~HTMLTokenizer()
1645 void HTMLTokenizer::enlargeBuffer(int len)
1647 int newsize = max(size*2, size+len);
1648 int oldoffs = (dest - buffer);
1650 buffer = (QChar*)fastRealloc(buffer, newsize*sizeof(QChar));
1651 dest = buffer + oldoffs;
1655 void HTMLTokenizer::enlargeScriptBuffer(int len)
1657 int newsize = max(scriptCodeMaxSize*2, scriptCodeMaxSize+len);
1658 scriptCode = (QChar*)fastRealloc(scriptCode, newsize*sizeof(QChar));
1659 scriptCodeMaxSize = newsize;
1662 void HTMLTokenizer::notifyFinished(CachedObject */*finishedObj*/)
1664 #if INSTRUMENT_LAYOUT_SCHEDULING
1665 if (!parser->doc()->ownerElement())
1666 printf("script loaded at %d\n", parser->doc()->elapsedTime());
1669 ASSERT(!pendingScripts.isEmpty());
1670 bool finished = false;
1671 while (!finished && pendingScripts.head()->isLoaded()) {
1673 kdDebug( 6036 ) << "Finished loading an external script" << endl;
1675 CachedScript* cs = pendingScripts.dequeue();
1676 ASSERT(cs->accessCount() > 0);
1678 String scriptSource = cs->script();
1680 kdDebug( 6036 ) << "External script is:" << endl << scriptSource.deprecatedString() << endl;
1682 setSrc(SegmentedString());
1684 // make sure we forget about the script before we execute the new one
1685 // infinite recursion might happen otherwise
1686 DeprecatedString cachedScriptUrl( cs->url().deprecatedString() );
1687 bool errorOccurred = cs->errorOccurred();
1689 RefPtr<Node> n = scriptNode;
1692 #if INSTRUMENT_LAYOUT_SCHEDULING
1693 if (!parser->doc()->ownerElement())
1694 printf("external script beginning execution at %d\n", parser->doc()->elapsedTime());
1698 EventTargetNodeCast(n.get())->dispatchHTMLEvent(errorEvent, false, false);
1700 m_state = scriptExecution(scriptSource.deprecatedString(), m_state, cachedScriptUrl);
1701 EventTargetNodeCast(n.get())->dispatchHTMLEvent(loadEvent, false, false);
1704 // The state of pendingScripts.isEmpty() can change inside the scriptExecution()
1705 // call above, so test afterwards.
1706 finished = pendingScripts.isEmpty();
1708 m_state.setLoadingExtScript(false);
1709 #if INSTRUMENT_LAYOUT_SCHEDULING
1710 if (!parser->doc()->ownerElement())
1711 printf("external script finished execution at %d\n", parser->doc()->elapsedTime());
1715 // 'inScript' is true when we are called synchronously from
1716 // parseScript(). In that case parseScript() will take care
1717 // of 'scriptOutput'.
1718 if (!m_state.inScript()) {
1719 SegmentedString rest = pendingSrc;
1722 // we might be deleted at this point, do not
1723 // access any members.
1728 bool HTMLTokenizer::isWaitingForScripts() const
1730 return m_state.loadingExtScript();
1733 void HTMLTokenizer::setSrc(const SegmentedString &source)
1735 lineno += src.lineCount();
1737 src.resetLineCount();
1740 void parseHTMLDocumentFragment(const String &source, DocumentFragment *fragment)
1742 HTMLTokenizer tok(fragment);
1743 tok.setForceSynchronous(true);
1744 tok.write(source.deprecatedString(), true);
1746 ASSERT(!tok.processingData()); // make sure we're done (see 3963151)
1749 unsigned short decodeNamedEntity(const char* name)
1751 const Entity* e = findEntity(name, strlen(name));
1752 return e ? e->code : 0;