2 This file is part of the KDE libraries
4 Copyright (C) 1997 Martin Jones (mjones@kde.org)
5 (C) 1997 Torben Weis (weis@kde.org)
6 (C) 1998 Waldo Bastian (bastian@kde.org)
7 (C) 1999 Lars Knoll (knoll@kde.org)
8 (C) 1999 Antti Koivisto (koivisto@kde.org)
9 (C) 2001 Dirk Mueller (mueller@kde.org)
10 Copyright (C) 2004, 2005, 2006 Apple Computer, Inc.
11 Copyright (C) 2005, 2006 Alexey Proskuryakov (ap@nypop.com)
13 This library is free software; you can redistribute it and/or
14 modify it under the terms of the GNU Library General Public
15 License as published by the Free Software Foundation; either
16 version 2 of the License, or (at your option) any later version.
18 This library is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 Library General Public License for more details.
23 You should have received a copy of the GNU Library General Public License
24 along with this library; see the file COPYING.LIB. If not, write to
25 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
26 Boston, MA 02111-1307, USA.
30 #include "HTMLTokenizer.h"
32 #include "CachedScript.h"
33 #include "DocLoader.h"
34 #include "DocumentFragment.h"
35 #include "EventNames.h"
37 #include "HTMLViewSourceDocument.h"
38 #include "HTMLElement.h"
39 #include "SystemTime.h"
40 #include "csshelper.h"
41 #include "HTMLNames.h"
42 #include "HTMLParser.h"
43 #include "kjs_proxy.h"
45 #include "HTMLEntityNames.c"
47 // #define INSTRUMENT_LAYOUT_SCHEDULING 1
49 #define TOKENIZER_CHUNK_SIZE 4096
55 using namespace HTMLNames;
56 using namespace EventNames;
58 // FIXME: We would like this constant to be 200ms.
59 // Yielding more aggressively results in increased responsiveness and better incremental rendering.
60 // It slows down overall page-load on slower machines, though, so for now we set a value of 500.
61 const double tokenizerTimeDelay = 0.500;
63 static const char commentStart [] = "<!--";
64 static const char scriptEnd [] = "</script";
65 static const char xmpEnd [] = "</xmp";
66 static const char styleEnd [] = "</style";
67 static const char textareaEnd [] = "</textarea";
68 static const char titleEnd [] = "</title";
70 // Full support for MS Windows extensions to Latin-1.
71 // Technically these extensions should only be activated for pages
72 // marked "windows-1252" or "cp1252", but
73 // in the standard Microsoft way, these extensions infect hundreds of thousands
74 // of web pages. Note that people with non-latin-1 Microsoft extensions
77 // See: http://www.microsoft.com/globaldev/reference/WinCP.asp
78 // http://www.bbsinc.com/iso8859.html
79 // http://www.obviously.com/
81 // There may be better equivalents
83 // We only need this for entities. For non-entity text, we handle this in the text encoding.
85 static const UChar windowsLatin1ExtensionArray[32] = {
86 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87
87 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F
88 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97
89 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178 // 98-9F
92 static inline UChar fixUpChar(UChar c)
94 if ((c & ~0x1F) != 0x0080)
96 return windowsLatin1ExtensionArray[c - 0x80];
99 static inline bool tagMatch(const char* s1, const UChar* s2, unsigned length)
101 for (unsigned i = 0; i != length; ++i) {
102 unsigned char c1 = s1[i];
103 unsigned char uc1 = toupper(c1);
105 if (c1 != c2 && uc1 != c2)
111 inline void Token::addAttribute(Document* doc, const AtomicString& attrName, const AtomicString& v)
113 if (!attrName.isEmpty() && attrName != "/") {
114 Attribute* a = new MappedAttribute(attrName, v);
116 attrs = new NamedMappedAttrMap(0);
117 attrs->insertAttribute(a);
121 // ----------------------------------------------------------------------------
123 HTMLTokenizer::HTMLTokenizer(HTMLDocument* doc)
128 , scriptCodeMaxSize(0)
129 , scriptCodeResync(0)
130 , m_executingScript(0)
131 , m_requestingScript(false)
132 , m_timer(this, &HTMLTokenizer::timerFired)
134 , parser(new HTMLParser(doc))
141 HTMLTokenizer::HTMLTokenizer(HTMLViewSourceDocument* doc)
146 , scriptCodeMaxSize(0)
147 , scriptCodeResync(0)
148 , m_executingScript(0)
149 , m_requestingScript(false)
150 , m_timer(this, &HTMLTokenizer::timerFired)
159 HTMLTokenizer::HTMLTokenizer(DocumentFragment* frag)
163 , scriptCodeMaxSize(0)
164 , scriptCodeResync(0)
165 , m_executingScript(0)
166 , m_requestingScript(false)
167 , m_timer(this, &HTMLTokenizer::timerFired)
168 , m_doc(frag->document())
172 parser = new HTMLParser(frag);
176 void HTMLTokenizer::reset()
178 ASSERT(m_executingScript == 0);
180 while (!pendingScripts.isEmpty()) {
181 CachedScript *cs = pendingScripts.dequeue();
182 ASSERT(cs->accessCount() > 0);
190 fastFree(scriptCode);
192 scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0;
195 m_state.setAllowYield(false);
196 m_state.setForceSynchronous(false);
201 void HTMLTokenizer::begin()
203 m_executingScript = 0;
204 m_requestingScript = false;
205 m_state.setLoadingExtScript(false);
208 buffer = static_cast<UChar*>(fastMalloc(sizeof(UChar) * 254));
212 m_state.setEntityState(NoEntity);
213 scriptSrc = DeprecatedString::null;
215 currentPrependingSrc = 0;
217 brokenComments = false;
218 brokenServer = false;
220 scriptStartLineno = 0;
222 m_state.setForceSynchronous(false);
225 void HTMLTokenizer::setForceSynchronous(bool force)
227 m_state.setForceSynchronous(force);
230 HTMLTokenizer::State HTMLTokenizer::processListing(SegmentedString list, State state)
232 // This function adds the listing 'list' as
233 // preformatted text-tokens to the token-collection
234 while (!list.isEmpty()) {
235 if (state.skipLF()) {
236 state.setSkipLF(false);
245 if (*list == '\n' || *list == '\r') {
246 if (state.discardLF())
248 state.setDiscardLF(false); // We have discarded 1 LF
252 /* Check for MS-DOS CRLF sequence */
254 state.setSkipLF(true);
258 state.setDiscardLF(false);
267 HTMLTokenizer::State HTMLTokenizer::parseSpecial(SegmentedString &src, State state)
269 ASSERT(state.inTextArea() || state.inTitle() || !state.hasEntityState());
270 ASSERT(!state.hasTagState());
271 ASSERT(state.inXmp() + state.inTextArea() + state.inTitle() + state.inStyle() + state.inScript() == 1 );
272 if (state.inScript())
273 scriptStartLineno = lineno + src.lineCount();
275 if (state.inComment())
276 state = parseComment(src, state);
278 while ( !src.isEmpty() ) {
281 if (!scriptCodeResync && !brokenComments && !state.inTextArea() && !state.inXmp() && !state.inTitle() && ch == '-' && scriptCodeSize >= 3 && !src.escaped() && scriptCode[scriptCodeSize-3] == '<' && scriptCode[scriptCodeSize-2] == '!' && scriptCode[scriptCodeSize-1] == '-') {
282 state.setInComment(true);
283 state = parseComment(src, state);
286 if (scriptCodeResync && !tquote && ch == '>') {
288 scriptCodeSize = scriptCodeResync-1;
289 scriptCodeResync = 0;
290 scriptCode[ scriptCodeSize ] = scriptCode[ scriptCodeSize + 1 ] = 0;
291 if (state.inScript())
292 state = scriptHandler(state);
294 state = processListing(SegmentedString(scriptCode, scriptCodeSize), state);
296 if (state.inStyle()) {
297 currToken.tagName = styleTag.localName();
298 currToken.beginTag = false;
299 } else if (state.inTextArea()) {
300 currToken.tagName = textareaTag.localName();
301 currToken.beginTag = false;
302 } else if (state.inTitle()) {
303 currToken.tagName = titleTag.localName();
304 currToken.beginTag = false;
305 } else if (state.inXmp()) {
306 currToken.tagName = xmpTag.localName();
307 currToken.beginTag = false;
310 state.setInStyle(false);
311 state.setInScript(false);
312 state.setInTextArea(false);
313 state.setInTitle(false);
314 state.setInXmp(false);
316 scriptCodeSize = scriptCodeResync = 0;
320 // possible end of tagname, lets check.
321 if (!scriptCodeResync && !state.escaped() && !src.escaped() && (ch == '>' || ch == '/' || ch <= ' ') && ch &&
322 scriptCodeSize >= searchStopperLen &&
323 tagMatch( searchStopper, scriptCode+scriptCodeSize-searchStopperLen, searchStopperLen )) {
324 scriptCodeResync = scriptCodeSize-searchStopperLen+1;
328 if (scriptCodeResync && !state.escaped()) {
330 tquote = (tquote == NoQuote) ? DoubleQuote : ((tquote == SingleQuote) ? SingleQuote : NoQuote);
332 tquote = (tquote == NoQuote) ? SingleQuote : (tquote == DoubleQuote) ? DoubleQuote : NoQuote;
333 else if (tquote != NoQuote && (ch == '\r' || ch == '\n'))
336 state.setEscaped(!state.escaped() && ch == '\\');
337 if (!scriptCodeResync && (state.inTextArea() || state.inTitle()) && !src.escaped() && ch == '&') {
338 UChar* scriptCodeDest = scriptCode+scriptCodeSize;
340 state = parseEntity(src, scriptCodeDest, state, m_cBufferPos, true, false);
341 scriptCodeSize = scriptCodeDest-scriptCode;
343 scriptCode[scriptCodeSize++] = *src;
351 HTMLTokenizer::State HTMLTokenizer::scriptHandler(State state)
353 // We are inside a <script>
354 bool doScriptExec = false;
356 // (Bugzilla 3837) Scripts following a frameset element should not execute or,
357 // in the case of extern scripts, even load.
358 bool followingFrameset = (m_doc->body() && m_doc->body()->hasTagName(framesetTag));
360 CachedScript* cs = 0;
361 // don't load external scripts for standalone documents (for now)
362 if (!inViewSourceMode()) {
363 if (!scriptSrc.isEmpty() && m_doc->frame()) {
364 // forget what we just got; load from src url instead
365 if (!parser->skipMode() && !followingFrameset) {
366 #ifdef INSTRUMENT_LAYOUT_SCHEDULING
367 if (!m_doc->ownerElement())
368 printf("Requesting script at time %d\n", m_doc->elapsedTime());
370 // The parser might have been stopped by for example a window.close call in an earlier script.
371 // If so, we don't want to load scripts.
372 if (!m_parserStopped && (cs = m_doc->docLoader()->requestScript(scriptSrc, scriptSrcCharset)))
373 pendingScripts.enqueue(cs);
378 scriptSrc=DeprecatedString::null;
381 kdDebug( 6036 ) << "---START SCRIPT---" << endl;
382 kdDebug( 6036 ) << DeprecatedString(scriptCode, scriptCodeSize) << endl;
383 kdDebug( 6036 ) << "---END SCRIPT---" << endl;
386 // Parse scriptCode containing <script> info
391 state = processListing(SegmentedString(scriptCode, scriptCodeSize), state);
392 DeprecatedString exScript(reinterpret_cast<DeprecatedChar*>(buffer), dest - buffer);
394 currToken.tagName = scriptTag.localName();
395 currToken.beginTag = false;
398 state.setInScript(false);
400 // FIXME: The script should be syntax highlighted.
401 if (inViewSourceMode())
404 SegmentedString *savedPrependingSrc = currentPrependingSrc;
405 SegmentedString prependingSrc;
406 currentPrependingSrc = &prependingSrc;
407 scriptCodeSize = scriptCodeResync = 0;
409 if (!parser->skipMode() && !followingFrameset) {
411 if (savedPrependingSrc)
412 savedPrependingSrc->append(src);
414 pendingSrc.prepend(src);
415 setSrc(SegmentedString());
417 // the ref() call below may call notifyFinished if the script is already in cache,
418 // and that mucks with the state directly, so we must write it back to the object.
420 bool savedRequestingScript = m_requestingScript;
421 m_requestingScript = true;
423 m_requestingScript = savedRequestingScript;
425 // will be 0 if script was already loaded and ref() executed it
426 if (!pendingScripts.isEmpty())
427 state.setLoadingExtScript(true);
429 else if (!m_fragment && doScriptExec && javascript ) {
430 if (!m_executingScript)
431 pendingSrc.prepend(src);
434 setSrc(SegmentedString());
435 state = scriptExecution(exScript, state, DeprecatedString::null, scriptStartLineno);
439 if (!m_executingScript && !state.loadingExtScript()) {
440 src.append(pendingSrc);
442 } else if (!prependingSrc.isEmpty()) {
443 // restore first so that the write appends in the right place
444 // (does not hurt to do it again below)
445 currentPrependingSrc = savedPrependingSrc;
447 // we need to do this slightly modified bit of one of the write() cases
448 // because we want to prepend to pendingSrc rather than appending
449 // if there's no previous prependingSrc
450 if (state.loadingExtScript()) {
451 if (currentPrependingSrc) {
452 currentPrependingSrc->append(prependingSrc);
454 pendingSrc.prepend(prependingSrc);
458 write(prependingSrc, false);
463 currentPrependingSrc = savedPrependingSrc;
468 HTMLTokenizer::State HTMLTokenizer::scriptExecution(const DeprecatedString& str, State state, DeprecatedString scriptURL, int baseLine)
470 if (m_fragment || !m_doc->frame())
473 DeprecatedString url = scriptURL.isNull() ? m_doc->frame()->document()->URL() : scriptURL;
475 SegmentedString *savedPrependingSrc = currentPrependingSrc;
476 SegmentedString prependingSrc;
477 currentPrependingSrc = &prependingSrc;
479 #ifdef INSTRUMENT_LAYOUT_SCHEDULING
480 if (!m_doc->ownerElement())
481 printf("beginning script execution at %d\n", m_doc->elapsedTime());
485 m_doc->frame()->executeScript(url,baseLine,0,str);
488 state.setAllowYield(true);
490 #ifdef INSTRUMENT_LAYOUT_SCHEDULING
491 if (!m_doc->ownerElement())
492 printf("ending script execution at %d\n", m_doc->elapsedTime());
497 if (!m_executingScript && !state.loadingExtScript()) {
498 src.append(pendingSrc);
500 } else if (!prependingSrc.isEmpty()) {
501 // restore first so that the write appends in the right place
502 // (does not hurt to do it again below)
503 currentPrependingSrc = savedPrependingSrc;
505 // we need to do this slightly modified bit of one of the write() cases
506 // because we want to prepend to pendingSrc rather than appending
507 // if there's no previous prependingSrc
508 if (state.loadingExtScript()) {
509 if (currentPrependingSrc)
510 currentPrependingSrc->append(prependingSrc);
512 pendingSrc.prepend(prependingSrc);
515 write(prependingSrc, false);
520 currentPrependingSrc = savedPrependingSrc;
525 HTMLTokenizer::State HTMLTokenizer::parseComment(SegmentedString &src, State state)
527 // FIXME: Why does this code even run for comments inside <script> and <style>? This seems bogus.
528 checkScriptBuffer(src.length());
529 while ( !src.isEmpty() ) {
530 scriptCode[ scriptCodeSize++ ] = *src;
533 bool handleBrokenComments = brokenComments && !(state.inScript() || state.inStyle());
534 int endCharsCount = 1; // start off with one for the '>' character
535 if (scriptCodeSize > 2 && scriptCode[scriptCodeSize-3] == '-' && scriptCode[scriptCodeSize-2] == '-') {
538 else if (scriptCodeSize > 3 && scriptCode[scriptCodeSize-4] == '-' && scriptCode[scriptCodeSize-3] == '-' &&
539 scriptCode[scriptCodeSize-2] == '!') {
540 // Other browsers will accept --!> as a close comment, even though it's
541 // not technically valid.
544 if (handleBrokenComments || endCharsCount > 1) {
546 if (!(state.inScript() || state.inXmp() || state.inTextArea() || state.inStyle())) {
548 scriptCode[scriptCodeSize] = 0;
549 scriptCode[scriptCodeSize + 1] = 0;
550 currToken.tagName = commentAtom;
551 currToken.beginTag = true;
552 state = processListing(SegmentedString(scriptCode, scriptCodeSize - endCharsCount), state);
554 currToken.tagName = commentAtom;
555 currToken.beginTag = false;
559 state.setInComment(false);
560 return state; // Finished parsing comment
569 HTMLTokenizer::State HTMLTokenizer::parseServer(SegmentedString& src, State state)
571 checkScriptBuffer(src.length());
572 while (!src.isEmpty()) {
573 scriptCode[scriptCodeSize++] = *src;
575 scriptCodeSize > 1 && scriptCode[scriptCodeSize-2] == '%') {
577 state.setInServer(false);
579 return state; // Finished parsing server include
586 HTMLTokenizer::State HTMLTokenizer::parseProcessingInstruction(SegmentedString &src, State state)
589 while (!src.isEmpty()) {
590 UChar chbegin = *src;
592 tquote = tquote == SingleQuote ? NoQuote : SingleQuote;
593 else if (chbegin == '\"')
594 tquote = tquote == DoubleQuote ? NoQuote : DoubleQuote;
596 // Some crappy sites omit the "?" before it, so
597 // we look for an unquoted '>' instead. (IE compatible)
598 else if (chbegin == '>' && (!tquote || oldchar == '?')) {
599 // We got a '?>' sequence
600 state.setInProcessingInstruction(false);
602 state.setDiscardLF(true);
603 return state; // Finished parsing comment!
612 HTMLTokenizer::State HTMLTokenizer::parseText(SegmentedString &src, State state)
614 while (!src.isEmpty()) {
617 if (state.skipLF()) {
618 state.setSkipLF(false);
625 // do we need to enlarge the buffer?
629 state.setSkipLF(true);
640 HTMLTokenizer::State HTMLTokenizer::parseEntity(SegmentedString &src, UChar*& dest, State state, unsigned &cBufferPos, bool start, bool parsingTag)
645 state.setEntityState(SearchEntity);
646 EntityUnicodeValue = 0;
649 while(!src.isEmpty())
652 switch(state.entityState()) {
654 ASSERT(state.entityState() != NoEntity);
659 cBuffer[cBufferPos++] = cc;
661 state.setEntityState(NumericSearch);
664 state.setEntityState(EntityName);
669 if (cc == 'x' || cc == 'X') {
670 cBuffer[cBufferPos++] = cc;
672 state.setEntityState(Hexadecimal);
673 } else if (cc >= '0' && cc <= '9')
674 state.setEntityState(Decimal);
676 state.setEntityState(SearchSemicolon);
680 int ll = min(src.length(), 10 - cBufferPos);
683 if (!((cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'f') || (cc >= 'A' && cc <= 'F'))) {
684 state.setEntityState(SearchSemicolon);
691 digit = (cc - 'A' + 10) & 0xF; // handle both upper and lower case without a branch
692 EntityUnicodeValue = EntityUnicodeValue * 16 + digit;
693 cBuffer[cBufferPos++] = cc;
696 if (cBufferPos == 10)
697 state.setEntityState(SearchSemicolon);
702 int ll = min(src.length(), 9-cBufferPos);
706 if (!(cc >= '0' && cc <= '9')) {
707 state.setEntityState(SearchSemicolon);
711 EntityUnicodeValue = EntityUnicodeValue * 10 + (cc - '0');
712 cBuffer[cBufferPos++] = cc;
716 state.setEntityState(SearchSemicolon);
721 int ll = min(src.length(), 9-cBufferPos);
725 if (!((cc >= 'a' && cc <= 'z') || (cc >= '0' && cc <= '9') || (cc >= 'A' && cc <= 'Z'))) {
726 state.setEntityState(SearchSemicolon);
730 cBuffer[cBufferPos++] = cc;
734 state.setEntityState(SearchSemicolon);
735 if (state.entityState() == SearchSemicolon) {
737 const Entity *e = findEntity(cBuffer, cBufferPos);
739 EntityUnicodeValue = e->code;
742 if(parsingTag && EntityUnicodeValue > 255 && *src != ';')
743 EntityUnicodeValue = 0;
749 case SearchSemicolon:
750 // Don't allow values that are more than 21 bits.
751 if (EntityUnicodeValue > 0 && EntityUnicodeValue <= 0x10FFFF) {
752 if (!inViewSourceMode()) {
755 if (EntityUnicodeValue <= 0xFFFF) {
757 src.push(fixUpChar(EntityUnicodeValue));
759 // Convert to UTF-16, using surrogate code points.
761 src.push(U16_LEAD(EntityUnicodeValue));
762 src.push(U16_TRAIL(EntityUnicodeValue));
765 // FIXME: We should eventually colorize entities by sending them as a special token.
768 for (unsigned i = 0; i < cBufferPos; i++)
769 dest[i] = cBuffer[i];
778 // ignore the sequence, add it to the buffer as plaintext
780 for (unsigned i = 0; i < cBufferPos; i++)
781 dest[i] = cBuffer[i];
785 state.setEntityState(NoEntity);
793 HTMLTokenizer::State HTMLTokenizer::parseTag(SegmentedString &src, State state)
795 ASSERT(!state.hasEntityState());
797 unsigned cBufferPos = m_cBufferPos;
799 while (!src.isEmpty())
802 switch(state.tagState()) {
805 m_cBufferPos = cBufferPos;
810 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
815 if (*src == commentStart[searchCount])
818 if (searchCount == 4)
821 kdDebug( 6036 ) << "Found comment" << endl;
823 // Found '<!--' sequence
825 dest = buffer; // ignore the previous part of this tag
826 state.setInComment(true);
827 state.setTagState(NoTag);
829 // Fix bug 34302 at kde.bugs.org. Go ahead and treat
830 // <!--> as a valid comment, since both mozilla and IE on windows
831 // can handle this case. Only do this in quirks mode. -dwh
832 if (!src.isEmpty() && *src == '>' && m_doc->inCompatMode()) {
833 state.setInComment(false);
836 // cuts off high bits, which is okay
837 cBuffer[cBufferPos++] = *src;
840 state = parseComment(src, state);
842 m_cBufferPos = cBufferPos;
843 return state; // Finished parsing tag!
845 // cuts off high bits, which is okay
846 cBuffer[cBufferPos++] = *src;
851 searchCount = 0; // Stop looking for '<!--' sequence
855 unsigned int ll = min(src.length(), CBUFLEN-cBufferPos);
857 UChar curchar = *src;
858 if(curchar <= ' ' || curchar == '>' ) {
863 // tolower() shows up on profiles. This is faster!
864 if (curchar >= 'A' && curchar <= 'Z' && !inViewSourceMode())
865 cBuffer[cBufferPos++] = curchar + ('a' - 'A');
867 cBuffer[cBufferPos++] = curchar;
871 // Disadvantage: we add the possible rest of the tag
872 // as attribute names. ### judge if this causes problems
873 if(finish || CBUFLEN == cBufferPos) {
876 unsigned int len = cBufferPos;
877 cBuffer[cBufferPos] = '\0';
878 if ((cBufferPos > 0) && (*ptr == '/')) {
888 // Ignore the / in fake xml tags like <br/>. We trim off the "/" so that we'll get "br" as the tag name and not "br/".
889 if (len > 1 && ptr[len-1] == '/' && !inViewSourceMode())
892 // Now that we've shaved off any invalid / that might have followed the name), make the tag.
893 // FIXME: FireFox and WinIE turn !foo nodes into comments, we ignore comments. (fast/parser/tag-with-exclamation-point.html)
895 currToken.tagName = AtomicString(ptr);
896 currToken.beginTag = beginTag;
899 state.setTagState(SearchAttribute);
904 case SearchAttribute:
905 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
906 qDebug("SearchAttribute");
908 while(!src.isEmpty()) {
909 UChar curchar = *src;
910 // In this mode just ignore any quotes we encounter and treat them like spaces.
911 if (curchar > ' ' && curchar != '\'' && curchar != '"') {
912 if (curchar == '<' || curchar == '>')
913 state.setTagState(SearchEnd);
915 state.setTagState(AttributeName);
925 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
926 qDebug("AttributeName");
928 int ll = min(src.length(), CBUFLEN-cBufferPos);
930 UChar curchar = *src;
931 // If we encounter a "/" when scanning an attribute name, treat it as a delimiter. However, we only do
932 // this if we have a non-empty attribute name. This allows the degenerate case of <input type=checkbox checked/>
933 // to work (despite it being utterly invalid).
934 if (curchar <= '>' && (curchar >= '=' || curchar <= ' ' || (curchar == '/' && attrName.length() > 0))) {
935 cBuffer[cBufferPos] = '\0';
936 attrName = AtomicString(cBuffer);
939 state.setTagState(SearchEqual);
940 // This is a deliberate quirk to match Mozilla and Opera. We have to do this
941 // since sites that use the "standards-compliant" path sometimes send
942 // <script src="foo.js"/>. Both Moz and Opera will honor this, despite it
943 // being bogus HTML. They do not honor the "/" for other tags. This behavior
944 // also deviates from WinIE, but in this case we'll just copy Moz and Opera.
945 if (currToken.tagName == scriptTag && curchar == '>' && attrName == "/")
946 currToken.flat = true;
950 // tolower() shows up on profiles. This is faster!
951 if (curchar >= 'A' && curchar <= 'Z')
952 cBuffer[cBufferPos++] = curchar + ('a' - 'A');
954 cBuffer[cBufferPos++] = curchar;
957 if ( cBufferPos == CBUFLEN ) {
958 cBuffer[cBufferPos] = '\0';
959 attrName = AtomicString(cBuffer);
962 state.setTagState(SearchEqual);
967 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
968 qDebug("SearchEqual");
970 while(!src.isEmpty()) {
971 UChar curchar = *src;
972 // In this mode just ignore any quotes we encounter and treat them like spaces.
973 if (curchar > ' ' && curchar != '\'' && curchar != '"') {
976 kdDebug(6036) << "found equal" << endl;
978 state.setTagState(SearchValue);
982 currToken.addAttribute(m_doc, attrName, emptyAtom);
984 state.setTagState(SearchAttribute);
992 while(!src.isEmpty()) {
993 UChar curchar = *src;
995 if(( curchar == '\'' || curchar == '\"' )) {
996 tquote = curchar == '\"' ? DoubleQuote : SingleQuote;
997 state.setTagState(QuotedValue);
1000 state.setTagState(Value);
1008 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
1009 qDebug("QuotedValue");
1011 while(!src.isEmpty()) {
1014 UChar curchar = *src;
1015 if (curchar == '>' && attrName.isEmpty()) {
1016 // Handle a case like <img '>. Just go ahead and be willing
1017 // to close the whole tag. Don't consume the character and
1018 // just go back into SearchEnd while ignoring the whole
1020 // FIXME: Note that this is actually not a very good solution. It's
1021 // an interim hack and doesn't handle the general case of
1022 // unmatched quotes among attributes that have names. -dwh
1023 while(dest > buffer+1 && (*(dest-1) == '\n' || *(dest-1) == '\r'))
1024 dest--; // remove trailing newlines
1025 AtomicString v(buffer+1, dest-buffer-1);
1026 attrName = v; // Just make the name/value match. (FIXME: Is this some WinIE quirk?)
1027 currToken.addAttribute(m_doc, attrName, v);
1028 state.setTagState(SearchAttribute);
1034 if(curchar <= '\'' && !src.escaped()) {
1035 // ### attributes like '&{blaa....};' are supposed to be treated as jscript.
1036 if ( curchar == '&' )
1039 state = parseEntity(src, dest, state, cBufferPos, true, true);
1042 else if ( (tquote == SingleQuote && curchar == '\'') ||
1043 (tquote == DoubleQuote && curchar == '\"') )
1045 // some <input type=hidden> rely on trailing spaces. argh
1046 while(dest > buffer+1 && (*(dest-1) == '\n' || *(dest-1) == '\r'))
1047 dest--; // remove trailing newlines
1048 AtomicString v(buffer+1, dest-buffer-1);
1049 if (attrName.isEmpty())
1050 attrName = v; // Make the name match the value. (FIXME: Is this a WinIE quirk?)
1051 currToken.addAttribute(m_doc, attrName, v);
1054 state.setTagState(SearchAttribute);
1065 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
1068 while(!src.isEmpty()) {
1070 UChar curchar = *src;
1071 if(curchar <= '>' && !src.escaped()) {
1073 if ( curchar == '&' )
1076 state = parseEntity(src, dest, state, cBufferPos, true, true);
1079 // no quotes. Every space means end of value
1080 // '/' does not delimit in IE!
1081 if ( curchar <= ' ' || curchar == '>' )
1083 AtomicString v(buffer+1, dest-buffer-1);
1084 currToken.addAttribute(m_doc, attrName, v);
1086 state.setTagState(SearchAttribute);
1097 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
1098 qDebug("SearchEnd");
1100 while(!src.isEmpty()) {
1101 if (*src == '>' || *src == '<')
1105 currToken.flat = true;
1109 if (src.isEmpty()) break;
1111 searchCount = 0; // Stop looking for '<!--' sequence
1112 state.setTagState(NoTag);
1118 if (currToken.tagName == nullAtom) { //stop if tag is unknown
1119 m_cBufferPos = cBufferPos;
1123 AtomicString tagName = currToken.tagName;
1124 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 0
1125 kdDebug( 6036 ) << "appending Tag: " << tagName.deprecatedString() << endl;
1128 // Handle <script src="foo"/> like Mozilla/Opera. We have to do this now for Dashboard
1130 bool isSelfClosingScript = currToken.flat && currToken.beginTag && currToken.tagName == scriptTag;
1131 bool beginTag = !currToken.flat && currToken.beginTag;
1132 if (currToken.beginTag && currToken.tagName == scriptTag) {
1134 bool foundTypeAttribute = false;
1135 scriptSrc = DeprecatedString::null;
1136 scriptSrcCharset = String();
1137 if ( currToken.attrs && /* potentially have a ATTR_SRC ? */
1139 m_doc->frame()->jScriptEnabled() && /* jscript allowed at all? */
1140 !m_fragment /* are we a regular tokenizer or just for innerHTML ? */
1142 if ((a = currToken.attrs->getAttributeItem(srcAttr)))
1143 scriptSrc = m_doc->completeURL(parseURL(a->value()).deprecatedString());
1144 if ((a = currToken.attrs->getAttributeItem(charsetAttr)))
1145 scriptSrcCharset = a->value().domString().stripWhiteSpace();
1146 if (scriptSrcCharset.isEmpty())
1147 scriptSrcCharset = m_doc->frame()->encoding();
1148 /* Check type before language, since language is deprecated */
1149 if ((a = currToken.attrs->getAttributeItem(typeAttr)) != 0 && !a->value().isEmpty())
1150 foundTypeAttribute = true;
1152 a = currToken.attrs->getAttributeItem(languageAttr);
1156 if( foundTypeAttribute ) {
1158 Mozilla 1.5 accepts application/x-javascript, and some web references claim it is the only
1159 correct variation, but WinIE 6 doesn't accept it.
1160 Neither Mozilla 1.5 nor WinIE 6 accept application/javascript, application/ecmascript, or
1161 application/x-ecmascript.
1162 Mozilla 1.5 doesn't accept the text/javascript1.x formats, but WinIE 6 does.
1163 Mozilla 1.5 doesn't accept text/jscript, text/ecmascript, and text/livescript, but WinIE 6 does.
1164 Mozilla 1.5 allows leading and trailing whitespace, but WinIE 6 doesn't.
1165 Mozilla 1.5 and WinIE 6 both accept the empty string, but neither accept a whitespace-only string.
1166 We want to accept all the values that either of these browsers accept, but not other values.
1168 DeprecatedString type = a->value().domString().stripWhiteSpace().lower().deprecatedString();
1169 if( type.compare("application/x-javascript") != 0 &&
1170 type.compare("text/javascript") != 0 &&
1171 type.compare("text/javascript1.0") != 0 &&
1172 type.compare("text/javascript1.1") != 0 &&
1173 type.compare("text/javascript1.2") != 0 &&
1174 type.compare("text/javascript1.3") != 0 &&
1175 type.compare("text/javascript1.4") != 0 &&
1176 type.compare("text/javascript1.5") != 0 &&
1177 type.compare("text/jscript") != 0 &&
1178 type.compare("text/ecmascript") != 0 &&
1179 type.compare("text/livescript") )
1183 Mozilla 1.5 doesn't accept jscript or ecmascript, but WinIE 6 does.
1184 Mozilla 1.5 accepts javascript1.0, javascript1.4, and javascript1.5, but WinIE 6 accepts only 1.1 - 1.3.
1185 Neither Mozilla 1.5 nor WinIE 6 accept leading or trailing whitespace.
1186 We want to accept all the values that either of these browsers accept, but not other values.
1188 String lang = a->value().domString().lower();
1190 lang != "javascript" &&
1191 lang != "javascript1.0" &&
1192 lang != "javascript1.1" &&
1193 lang != "javascript1.2" &&
1194 lang != "javascript1.3" &&
1195 lang != "javascript1.4" &&
1196 lang != "javascript1.5" &&
1197 lang != "ecmascript" &&
1198 lang != "livescript" &&
1204 RefPtr<Node> n = processToken();
1206 if ((tagName == preTag || tagName == listingTag) && !inViewSourceMode()) {
1208 state.setDiscardLF(true); // Discard the first LF after we open a pre.
1209 } else if (tagName == scriptTag) {
1210 ASSERT(!scriptNode);
1213 searchStopper = scriptEnd;
1214 searchStopperLen = 8;
1215 state.setInScript(true);
1216 state = parseSpecial(src, state);
1217 } else if (isSelfClosingScript) { // Handle <script src="foo"/>
1218 state.setInScript(true);
1219 state = scriptHandler(state);
1221 } else if (tagName == styleTag) {
1223 searchStopper = styleEnd;
1224 searchStopperLen = 7;
1225 state.setInStyle(true);
1226 state = parseSpecial(src, state);
1228 } else if (tagName == textareaTag) {
1230 searchStopper = textareaEnd;
1231 searchStopperLen = 10;
1232 state.setInTextArea(true);
1233 state = parseSpecial(src, state);
1235 } else if (tagName == titleTag) {
1237 searchStopper = titleEnd;
1238 searchStopperLen = 7;
1239 State savedState = state;
1240 SegmentedString savedSrc = src;
1241 long savedLineno = lineno;
1242 state.setInTitle(true);
1243 state = parseSpecial(src, state);
1244 if (state.inTitle() && src.isEmpty()) {
1245 // We just ate the rest of the document as the title #text node!
1246 // Reset the state then retokenize without special title handling.
1247 // Let the parser clean up the missing </title> tag.
1248 // FIXME: This is incorrect, because src.isEmpty() doesn't mean we're
1249 // at the end of the document unless noMoreData is also true. We need
1250 // to detect this case elsewhere, and save the state somewhere other
1251 // than a local variable.
1254 lineno = savedLineno;
1258 } else if (tagName == xmpTag) {
1260 searchStopper = xmpEnd;
1261 searchStopperLen = 5;
1262 state.setInXmp(true);
1263 state = parseSpecial(src, state);
1265 } else if (tagName == selectTag)
1266 state.setInSelect(beginTag);
1267 else if (tagName == plaintextTag)
1268 state.setInPlainText(beginTag);
1269 m_cBufferPos = cBufferPos;
1270 return state; // Finished parsing tag!
1274 m_cBufferPos = cBufferPos;
1278 inline bool HTMLTokenizer::continueProcessing(int& processedCount, double startTime, State &state)
1280 // We don't want to be checking elapsed time with every character, so we only check after we've
1281 // processed a certain number of characters.
1282 bool allowedYield = state.allowYield();
1283 state.setAllowYield(false);
1284 if (!state.loadingExtScript() && !state.forceSynchronous() && !m_executingScript && (processedCount > TOKENIZER_CHUNK_SIZE || allowedYield)) {
1286 if (currentTime() - startTime > tokenizerTimeDelay) {
1287 /* FIXME: We'd like to yield aggressively to give stylesheets the opportunity to
1288 load, but this hurts overall performance on slower machines. For now turn this
1290 || (!m_doc->haveStylesheetsLoaded() &&
1291 (m_doc->documentElement()->id() != ID_HTML || m_doc->body()))) {*/
1292 // Schedule the timer to keep processing as soon as possible.
1293 m_timer.startOneShot(0);
1294 #ifdef INSTRUMENT_LAYOUT_SCHEDULING
1295 if (currentTime() - startTime > tokenizerTimeDelay)
1296 printf("Deferring processing of data because 500ms elapsed away from event loop.\n");
1306 bool HTMLTokenizer::write(const SegmentedString& str, bool appendData)
1309 kdDebug( 6036 ) << this << " Tokenizer::write(\"" << str.toString() << "\"," << appendData << ")" << endl;
1315 if (m_parserStopped)
1318 if ( ( m_executingScript && appendData ) || !pendingScripts.isEmpty() ) {
1319 // don't parse; we will do this later
1320 if (currentPrependingSrc) {
1321 currentPrependingSrc->append(str);
1323 pendingSrc.append(str);
1333 // Once a timer is set, it has control of when the tokenizer continues.
1334 if (m_timer.isActive())
1337 bool wasInWrite = inWrite;
1340 #ifdef INSTRUMENT_LAYOUT_SCHEDULING
1341 if (!m_doc->ownerElement())
1342 printf("Beginning write at time %d\n", m_doc->elapsedTime());
1345 int processedCount = 0;
1346 double startTime = currentTime();
1348 Frame *frame = m_doc->frame();
1350 State state = m_state;
1352 while (!src.isEmpty() && (!frame || !frame->isScheduledLocationChangePending())) {
1353 if (!continueProcessing(processedCount, startTime, state))
1356 // do we need to enlarge the buffer?
1361 bool wasSkipLF = state.skipLF();
1363 state.setSkipLF(false);
1365 if (wasSkipLF && (cc == '\n'))
1367 else if (state.needsSpecialWriteHandling()) {
1368 // it's important to keep needsSpecialWriteHandling with the flags this block tests
1369 if (state.hasEntityState())
1370 state = parseEntity(src, dest, state, m_cBufferPos, false, state.hasTagState());
1371 else if (state.inPlainText())
1372 state = parseText(src, state);
1373 else if (state.inAnySpecial())
1374 state = parseSpecial(src, state);
1375 else if (state.inComment())
1376 state = parseComment(src, state);
1377 else if (state.inServer())
1378 state = parseServer(src, state);
1379 else if (state.inProcessingInstruction())
1380 state = parseProcessingInstruction(src, state);
1381 else if (state.hasTagState())
1382 state = parseTag(src, state);
1383 else if (state.startTag()) {
1384 state.setStartTag(false);
1391 searchCount = 1; // Look for '<!--' sequence to start comment
1396 // xml processing instruction
1397 state.setInProcessingInstruction(true);
1399 state = parseProcessingInstruction(src, state);
1405 if (!brokenServer) {
1406 // <% server stuff, handle as comment %>
1407 state.setInServer(true);
1409 state = parseServer(src, state);
1412 // else fall through
1414 if( ((cc >= 'a') && (cc <= 'z')) || ((cc >= 'A') && (cc <= 'Z'))) {
1415 // Start of a Start-Tag
1429 state.setTagState(TagName);
1430 state = parseTag(src, state);
1432 } else if (cc == '&' && !src.escaped()) {
1434 state = parseEntity(src, dest, state, m_cBufferPos, true, state.hasTagState());
1435 } else if (cc == '<' && !src.escaped()) {
1436 tagStartLineno = lineno+src.lineCount();
1438 state.setStartTag(true);
1439 } else if (cc == '\n' || cc == '\r') {
1440 if (state.discardLF())
1442 state.setDiscardLF(false); // We have discarded 1 LF
1447 /* Check for MS-DOS CRLF sequence */
1449 state.setSkipLF(true);
1452 state.setDiscardLF(false);
1458 #ifdef INSTRUMENT_LAYOUT_SCHEDULING
1459 if (!m_doc->ownerElement())
1460 printf("Ending write at time %d\n", m_doc->elapsedTime());
1463 inWrite = wasInWrite;
1467 if (noMoreData && !inWrite && !state.loadingExtScript() && !m_executingScript && !m_timer.isActive()) {
1468 end(); // this actually causes us to be deleted
1474 void HTMLTokenizer::stopParsing()
1476 Tokenizer::stopParsing();
1479 // The part needs to know that the tokenizer has finished with its data,
1480 // regardless of whether it happened naturally or due to manual intervention.
1481 if (!m_fragment && m_doc->frame())
1482 m_doc->frame()->tokenizerProcessedData();
1485 bool HTMLTokenizer::processingData() const
1487 return m_timer.isActive();
1490 void HTMLTokenizer::timerFired(Timer<HTMLTokenizer>*)
1492 #ifdef INSTRUMENT_LAYOUT_SCHEDULING
1493 if (!m_doc->ownerElement())
1494 printf("Beginning timer write at time %d\n", m_doc->elapsedTime());
1497 if (m_doc->view() && m_doc->view()->layoutPending() && !m_doc->minimumLayoutDelay()) {
1498 // Restart the timer and let layout win. This is basically a way of ensuring that the layout
1499 // timer has higher priority than our timer.
1500 m_timer.startOneShot(0);
1504 RefPtr<Frame> frame = m_fragment ? 0 : m_doc->frame();
1506 // Invoke write() as though more data came in.
1507 bool didCallEnd = write(SegmentedString(), true);
1509 // If we called end() during the write, we need to let WebKit know that we're done processing the data.
1510 if (didCallEnd && frame)
1511 frame->tokenizerProcessedData();
1514 void HTMLTokenizer::end()
1516 ASSERT(!m_timer.isActive());
1517 m_timer.stop(); // Only helps if assertion above fires, but do it anyway.
1520 // parseTag is using the buffer for different matters
1521 if (!m_state.hasTagState())
1524 fastFree(scriptCode);
1526 scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0;
1532 if (!inViewSourceMode())
1535 m_doc->finishedParsing();
1538 void HTMLTokenizer::finish()
1540 // do this as long as we don't find matching comment ends
1541 while((m_state.inComment() || m_state.inServer()) && scriptCode && scriptCodeSize) {
1542 // we've found an unmatched comment start
1543 if (m_state.inComment())
1544 brokenComments = true;
1546 brokenServer = true;
1547 checkScriptBuffer();
1548 scriptCode[scriptCodeSize] = 0;
1549 scriptCode[scriptCodeSize + 1] = 0;
1552 if (m_state.inScript() || m_state.inStyle())
1553 food = String(scriptCode, scriptCodeSize);
1554 else if (m_state.inServer()) {
1556 food.append(String(scriptCode, scriptCodeSize));
1558 pos = DeprecatedConstString(reinterpret_cast<DeprecatedChar*>(scriptCode), scriptCodeSize).string().find('>');
1559 food = String(scriptCode + pos + 1, scriptCodeSize - pos - 1);
1561 fastFree(scriptCode);
1563 scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0;
1564 m_state.setInComment(false);
1565 m_state.setInServer(false);
1566 if (!food.isEmpty())
1569 // this indicates we will not receive any more data... but if we are waiting on
1570 // an external script to load, we can't finish parsing until that is done
1572 if (!inWrite && !m_state.loadingExtScript() && !m_executingScript && !m_timer.isActive())
1573 end(); // this actually causes us to be deleted
1576 PassRefPtr<Node> HTMLTokenizer::processToken()
1578 KJSProxy* jsProxy = (!m_fragment && m_doc->frame()) ? m_doc->frame()->jScript() : 0;
1580 jsProxy->setEventHandlerLineno(tagStartLineno);
1581 if (dest > buffer) {
1583 if(currToken.tagName.length()) {
1584 qDebug( "unexpected token: %s, str: *%s*", currToken.tagName.deprecatedString().latin1(),DeprecatedConstString( buffer,dest-buffer ).deprecatedString().latin1() );
1589 currToken.text = new StringImpl( buffer, dest - buffer );
1590 if (currToken.tagName != commentAtom)
1591 currToken.tagName = textAtom;
1592 } else if (currToken.tagName == nullAtom) {
1595 jsProxy->setEventHandlerLineno(lineno+src.lineCount());
1602 DeprecatedString name = currToken.tagName.deprecatedString();
1603 DeprecatedString text;
1605 text = DeprecatedConstString(currToken.text->unicode(), currToken.text->length()).deprecatedString();
1607 kdDebug( 6036 ) << "Token --> " << name << endl;
1609 kdDebug( 6036 ) << "Token is FLAT!" << endl;
1611 kdDebug( 6036 ) << "text: \"" << text << "\"" << endl;
1612 unsigned l = currToken.attrs ? currToken.attrs->length() : 0;
1614 kdDebug( 6036 ) << "Attributes: " << l << endl;
1615 for (unsigned i = 0; i < l; ++i) {
1616 Attribute* c = currToken.attrs->attributeItem(i);
1617 kdDebug( 6036 ) << " " << c->localName().deprecatedString()
1618 << "=\"" << c->value().deprecatedString() << "\"" << endl;
1621 kdDebug( 6036 ) << endl;
1626 if (!m_parserStopped) {
1627 if (inViewSourceMode())
1628 static_cast<HTMLViewSourceDocument*>(m_doc)->addViewSourceToken(&currToken);
1630 // pass the token over to the parser, the parser DOES NOT delete the token
1631 n = parser->parseToken(&currToken);
1635 jsProxy->setEventHandlerLineno(0);
1640 HTMLTokenizer::~HTMLTokenizer()
1648 void HTMLTokenizer::enlargeBuffer(int len)
1650 int newSize = max(size * 2, size + len);
1651 int oldOffset = dest - buffer;
1652 buffer = static_cast<UChar*>(fastRealloc(buffer, newSize * sizeof(UChar)));
1653 dest = buffer + oldOffset;
1657 void HTMLTokenizer::enlargeScriptBuffer(int len)
1659 int newSize = max(scriptCodeMaxSize * 2, scriptCodeMaxSize + len);
1660 scriptCode = static_cast<UChar*>(fastRealloc(scriptCode, newSize * sizeof(UChar)));
1661 scriptCodeMaxSize = newSize;
1664 void HTMLTokenizer::notifyFinished(CachedResource*)
1666 #ifdef INSTRUMENT_LAYOUT_SCHEDULING
1667 if (!m_doc->ownerElement())
1668 printf("script loaded at %d\n", m_doc->elapsedTime());
1671 ASSERT(!pendingScripts.isEmpty());
1672 bool finished = false;
1673 while (!finished && pendingScripts.head()->isLoaded()) {
1675 kdDebug( 6036 ) << "Finished loading an external script" << endl;
1677 CachedScript* cs = pendingScripts.dequeue();
1678 ASSERT(cs->accessCount() > 0);
1680 String scriptSource = cs->script();
1682 kdDebug( 6036 ) << "External script is:" << endl << scriptSource.deprecatedString() << endl;
1684 setSrc(SegmentedString());
1686 // make sure we forget about the script before we execute the new one
1687 // infinite recursion might happen otherwise
1688 DeprecatedString cachedScriptUrl( cs->url().deprecatedString() );
1689 bool errorOccurred = cs->errorOccurred();
1691 RefPtr<Node> n = scriptNode;
1694 #ifdef INSTRUMENT_LAYOUT_SCHEDULING
1695 if (!m_doc->ownerElement())
1696 printf("external script beginning execution at %d\n", m_doc->elapsedTime());
1700 EventTargetNodeCast(n.get())->dispatchHTMLEvent(errorEvent, true, false);
1702 m_state = scriptExecution(scriptSource.deprecatedString(), m_state, cachedScriptUrl);
1703 EventTargetNodeCast(n.get())->dispatchHTMLEvent(loadEvent, false, false);
1706 // The state of pendingScripts.isEmpty() can change inside the scriptExecution()
1707 // call above, so test afterwards.
1708 finished = pendingScripts.isEmpty();
1710 m_state.setLoadingExtScript(false);
1711 #ifdef INSTRUMENT_LAYOUT_SCHEDULING
1712 if (!m_doc->ownerElement())
1713 printf("external script finished execution at %d\n", m_doc->elapsedTime());
1717 // 'm_requestingScript' is true when we are called synchronously from
1718 // scriptHandler(). In that case scriptHandler() will take care
1720 if (!m_requestingScript) {
1721 SegmentedString rest = pendingSrc;
1724 // we might be deleted at this point, do not
1725 // access any members.
1730 bool HTMLTokenizer::isWaitingForScripts() const
1732 return m_state.loadingExtScript();
1735 void HTMLTokenizer::setSrc(const SegmentedString &source)
1737 lineno += src.lineCount();
1739 src.resetLineCount();
1742 void parseHTMLDocumentFragment(const String& source, DocumentFragment* fragment)
1744 HTMLTokenizer tok(fragment);
1745 tok.setForceSynchronous(true);
1746 tok.write(source, true);
1748 ASSERT(!tok.processingData()); // make sure we're done (see 3963151)
1751 UChar decodeNamedEntity(const char* name)
1753 const Entity* e = findEntity(name, strlen(name));
1754 return e ? e->code : 0;