2 This file is part of the KDE libraries
4 Copyright (C) 1997 Martin Jones (mjones@kde.org)
5 (C) 1997 Torben Weis (weis@kde.org)
6 (C) 1998 Waldo Bastian (bastian@kde.org)
7 (C) 1999 Lars Knoll (knoll@kde.org)
8 (C) 1999 Antti Koivisto (koivisto@kde.org)
9 (C) 2001 Dirk Mueller (mueller@kde.org)
10 Copyright (C) 2004 Apple Computer, Inc.
12 This library is free software; you can redistribute it and/or
13 modify it under the terms of the GNU Library General Public
14 License as published by the Free Software Foundation; either
15 version 2 of the License, or (at your option) any later version.
17 This library is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 Library General Public License for more details.
22 You should have received a copy of the GNU Library General Public License
23 along with this library; see the file COPYING.LIB. If not, write to
24 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
25 Boston, MA 02111-1307, USA.
27 //----------------------------------------------------------------------------
29 // KDE HTML Widget - Tokenizers
31 //#define TOKEN_DEBUG 1
32 //#define TOKEN_DEBUG 2
39 #include "html/htmltokenizer.h"
40 #include "html/html_documentimpl.h"
41 #include "html/htmlparser.h"
44 #include "misc/loader.h"
45 #include "misc/htmlhashes.h"
47 #include "khtmlview.h"
48 #include "khtml_part.h"
49 #include "xml/dom_docimpl.h"
50 #include "css/csshelper.h"
51 #include "ecma/kjs_proxy.h"
52 #include <kcharsets.h>
60 using DOM::AtomicString;
61 using DOM::AttributeImpl;
63 using DOM::DOMStringImpl;
64 using DOM::DocumentImpl;
70 // turn off inlining to void warning with newer gcc
73 #include "kentities.c"
76 // #define INSTRUMENT_LAYOUT_SCHEDULING 1
78 #define TOKENIZER_CHUNK_SIZE 4096
80 // FIXME: We would like this constant to be 200ms. Yielding more aggressively results in increased
81 // responsiveness and better incremental rendering. It slows down overall page-load on slower machines,
82 // though, so for now we set a value of 500.
83 #define TOKENIZER_TIME_DELAY 500
87 static const char commentStart [] = "<!--";
88 static const char scriptEnd [] = "</script";
89 static const char xmpEnd [] = "</xmp";
90 static const char styleEnd [] = "</style";
91 static const char textareaEnd [] = "</textarea";
92 static const char titleEnd [] = "</title";
94 #define KHTML_ALLOC_QCHAR_VEC( N ) (QChar*) malloc( sizeof(QChar)*( N ) )
95 #define KHTML_REALLOC_QCHAR_VEC(P, N ) (QChar*) P = realloc(p, sizeof(QChar)*( N ))
96 #define KHTML_DELETE_QCHAR_VEC( P ) free((char*)( P ))
98 // Full support for MS Windows extensions to Latin-1.
99 // Technically these extensions should only be activated for pages
100 // marked "windows-1252" or "cp1252", but
101 // in the standard Microsoft way, these extensions infect hundreds of thousands
102 // of web pages. Note that people with non-latin-1 Microsoft extensions
105 // See: http://www.microsoft.com/globaldev/reference/WinCP.asp
106 // http://www.bbsinc.com/iso8859.html
107 // http://www.obviously.com/
109 // There may be better equivalents
113 // Note that we have more Unicode characters than Qt, so we use the
114 // official mapping table from the Unicode 2.0 standard here instead of
115 // one with hacks to avoid certain Unicode characters. Also, we don't
116 // need the unrelated hacks to avoid Unicode characters that are in the
119 // We need this for entities at least. For non-entity text, we could
120 // handle this in the text codec.
122 // To cover non-entity text, I think this function would need to be called
123 // in more places. There seem to be many places that don't call fixUpChar.
125 inline void fixUpChar(QChar& c) {
126 switch (c.unicode()) {
127 case 0x0080: c = 0x20AC; break;
129 case 0x0082: c = 0x201A; break;
130 case 0x0083: c = 0x0192; break;
131 case 0x0084: c = 0x201E; break;
132 case 0x0085: c = 0x2026; break;
133 case 0x0086: c = 0x2020; break;
134 case 0x0087: c = 0x2021; break;
135 case 0x0088: c = 0x02C6; break;
136 case 0x0089: c = 0x2030; break;
137 case 0x008A: c = 0x0160; break;
138 case 0x008B: c = 0x2039; break;
139 case 0x008C: c = 0x0152; break;
141 case 0x008E: c = 0x017D; break;
144 case 0x0091: c = 0x2018; break;
145 case 0x0092: c = 0x2019; break;
146 case 0x0093: c = 0x201C; break;
147 case 0x0094: c = 0x201D; break;
148 case 0x0095: c = 0x2022; break;
149 case 0x0096: c = 0x2013; break;
150 case 0x0097: c = 0x2014; break;
151 case 0x0098: c = 0x02DC; break;
152 case 0x0099: c = 0x2122; break;
153 case 0x009A: c = 0x0161; break;
154 case 0x009B: c = 0x203A; break;
155 case 0x009C: c = 0x0153; break;
157 case 0x009E: c = 0x017E; break;
158 case 0x009F: c = 0x0178; break;
162 #else // APPLE_CHANGES
164 #define fixUpChar(x) \
166 switch ((x).cell()) \
168 /* ALL of these should be changed to Unicode SOON */ \
169 case 0x80: (x) = 0x20ac; break; \
170 case 0x82: (x) = ','; break; \
171 case 0x83: (x) = 0x0192; break; \
172 case 0x84: (x) = '"'; break; \
173 case 0x85: (x) = 0x2026; break; \
174 case 0x86: (x) = 0x2020; break; \
175 case 0x87: (x) = 0x2021; break; \
176 case 0x88: (x) = 0x02C6; break; \
177 case 0x89: (x) = 0x2030; break; \
178 case 0x8A: (x) = 0x0160; break; \
179 case 0x8b: (x) = '<'; break; \
180 case 0x8C: (x) = 0x0152; break; \
182 case 0x8E: (x) = 0x017D; break; \
185 case 0x91: (x) = '\''; break; \
186 case 0x92: (x) = '\''; break; \
187 case 0x93: (x) = '"'; break; \
188 case 0x94: (x) = '"'; break; \
189 case 0x95: (x) = '*'; break; \
190 case 0x96: (x) = '-'; break; \
191 case 0x97: (x) = '-'; break; \
192 case 0x98: (x) = '~'; break; \
193 case 0x99: (x) = 0x2122; break; \
194 case 0x9A: (x) = 0x0161; break; \
195 case 0x9b: (x) = '>'; break; \
196 case 0x9C: (x) = 0x0153; break; \
198 case 0x9E: (x) = 0x017E; break; \
199 case 0x9F: (x) = 0x0178; break; \
200 /* This one should die */ \
201 case 0xb7: (x) = '*'; break; \
206 /* These should all die sooner rather than later */ \
207 switch( (x).unicode() ) { \
208 case 0x2013: (x) = '-'; break; \
209 case 0x2014: (x) = '-'; break; \
210 case 0x2018: (x) = '\''; break; \
211 case 0x2019: (x) = '\''; break; \
212 case 0x201c: (x) = '"'; break; \
213 case 0x201d: (x) = '"'; break; \
214 case 0x2022: (x) = '*'; break; \
215 case 0x2122: (x) = 0x2122; break; \
220 #endif // APPLE_CHANGES
222 inline bool tagMatch(const char *s1, const QChar *s2, uint length)
224 for (uint i = 0; i != length; ++i) {
226 char uc1 = toupper(c1);
228 if (c1 != c2 && uc1 != c2)
234 // ----------------------------------------------------------------------------
236 HTMLTokenizer::HTMLTokenizer(DOM::DocumentPtr *_doc, KHTMLView *_view, bool includesComments)
244 scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0;
245 charsets = KGlobal::charsets();
246 parser = new KHTMLParser(_view, _doc, includesComments);
247 m_executingScript = 0;
248 loadingExtScript = false;
250 attrNamePresent = false;
252 includesCommentsInDOM = includesComments;
257 HTMLTokenizer::HTMLTokenizer(DOM::DocumentPtr *_doc, DOM::DocumentFragmentImpl *i, bool includesComments)
265 scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0;
266 charsets = KGlobal::charsets();
267 parser = new KHTMLParser(i, _doc, includesComments);
268 m_executingScript = 0;
269 loadingExtScript = false;
272 includesCommentsInDOM = includesComments;
277 void HTMLTokenizer::reset()
279 assert(m_executingScript == 0);
280 assert(onHold == false);
282 while (!cachedScript.isEmpty())
283 cachedScript.dequeue()->deref(this);
286 KHTML_DELETE_QCHAR_VEC(buffer);
291 KHTML_DELETE_QCHAR_VEC(scriptCode);
293 scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0;
305 void HTMLTokenizer::begin()
307 m_executingScript = 0;
308 loadingExtScript = false;
312 buffer = KHTML_ALLOC_QCHAR_VEC( 255 );
315 pending = NonePending;
316 discard = NoneDiscard;
321 processingInstruction = false;
335 loadingExtScript = false;
336 scriptSrc = QString::null;
338 currentPrependingSrc = 0;
340 brokenComments = false;
341 brokenServer = false;
343 scriptStartLineno = 0;
347 void HTMLTokenizer::processListing(TokenizerString list)
350 // This function adds the listing 'list' as
351 // preformatted text-tokens to the token-collection
352 // thereby converting TABs.
353 if(!style) pre = true;
356 while ( !list.isEmpty() )
358 checkBuffer(3*TAB_SIZE);
360 if (skipLF && ( *list != '\n' ))
370 else if (( *list == '\n' ) || ( *list == '\r' ))
372 if (discard == LFDiscard)
375 discard = NoneDiscard; // We have discarded 1 LF
384 /* Check for MS-DOS CRLF sequence */
391 else if (( *list == ' ' ) || ( *list == '\t'))
396 pending = SpacePending;
398 pending = TabPending;
404 discard = NoneDiscard;
423 void HTMLTokenizer::parseSpecial(TokenizerString &src)
425 assert( textarea || title || !Entity );
427 assert( xmp+textarea+title+style+script == 1 );
429 scriptStartLineno = lineno+src.lineCount();
431 if ( comment ) parseComment( src );
433 while ( !src.isEmpty() ) {
435 unsigned char ch = src->latin1();
436 if ( !scriptCodeResync && !brokenComments && !textarea && !xmp && !title && ch == '-' && scriptCodeSize >= 3 && !src.escaped() && scriptCode[scriptCodeSize-3] == '<' && scriptCode[scriptCodeSize-2] == '!' && scriptCode[scriptCodeSize-1] == '-' ) {
441 if ( scriptCodeResync && !tquote && ( ch == '>' ) ) {
443 scriptCodeSize = scriptCodeResync-1;
444 scriptCodeResync = 0;
445 scriptCode[ scriptCodeSize ] = scriptCode[ scriptCodeSize + 1 ] = 0;
449 processListing(TokenizerString(scriptCode, scriptCodeSize));
451 if ( style ) { currToken.id = ID_STYLE + ID_CLOSE_TAG; }
452 else if ( textarea ) { currToken.id = ID_TEXTAREA + ID_CLOSE_TAG; }
453 else if ( title ) { currToken.id = ID_TITLE + ID_CLOSE_TAG; }
454 else if ( xmp ) { currToken.id = ID_XMP + ID_CLOSE_TAG; }
456 style = script = style = textarea = title = xmp = false;
458 scriptCodeSize = scriptCodeResync = 0;
462 // possible end of tagname, lets check.
463 if ( !scriptCodeResync && !escaped && !src.escaped() && ( ch == '>' || ch == '/' || ch <= ' ' ) && ch &&
464 scriptCodeSize >= searchStopperLen &&
465 tagMatch( searchStopper, scriptCode+scriptCodeSize-searchStopperLen, searchStopperLen )) {
466 scriptCodeResync = scriptCodeSize-searchStopperLen+1;
470 if ( scriptCodeResync && !escaped ) {
472 tquote = (tquote == NoQuote) ? DoubleQuote : ((tquote == SingleQuote) ? SingleQuote : NoQuote);
474 tquote = (tquote == NoQuote) ? SingleQuote : (tquote == DoubleQuote) ? DoubleQuote : NoQuote;
475 else if (tquote != NoQuote && (ch == '\r' || ch == '\n'))
478 escaped = ( !escaped && ch == '\\' );
479 if (!scriptCodeResync && (textarea||title) && !src.escaped() && ch == '&') {
480 QChar *scriptCodeDest = scriptCode+scriptCodeSize;
482 parseEntity(src,scriptCodeDest,true);
483 scriptCodeSize = scriptCodeDest-scriptCode;
486 scriptCode[scriptCodeSize] = *src;
487 fixUpChar(scriptCode[scriptCodeSize]);
494 void HTMLTokenizer::scriptHandler()
496 // We are inside a <script>
497 bool doScriptExec = false;
498 CachedScript* cs = 0;
499 // don't load external scripts for standalone documents (for now)
500 if (!scriptSrc.isEmpty() && parser->doc()->part()) {
501 // forget what we just got; load from src url instead
502 if ( !parser->skipMode() ) {
503 #ifdef INSTRUMENT_LAYOUT_SCHEDULING
504 if (!parser->doc()->ownerElement())
505 printf("Requesting script at time %d\n", parser->doc()->elapsedTime());
507 if ( (cs = parser->doc()->docLoader()->requestScript(scriptSrc, scriptSrcCharset) ))
508 cachedScript.enqueue(cs);
510 scriptSrc=QString::null;
514 kdDebug( 6036 ) << "---START SCRIPT---" << endl;
515 kdDebug( 6036 ) << QString(scriptCode, scriptCodeSize) << endl;
516 kdDebug( 6036 ) << "---END SCRIPT---" << endl;
518 // Parse scriptCode containing <script> info
521 processListing(TokenizerString(scriptCode, scriptCodeSize));
522 QString exScript( buffer, dest-buffer );
524 currToken.id = ID_SCRIPT + ID_CLOSE_TAG;
527 TokenizerString *savedPrependingSrc = currentPrependingSrc;
528 TokenizerString prependingSrc;
529 currentPrependingSrc = &prependingSrc;
530 if ( !parser->skipMode() ) {
532 //kdDebug( 6036 ) << "cachedscript extern!" << endl;
533 //kdDebug( 6036 ) << "src: *" << QString( src.current(), src.length() ).latin1() << "*" << endl;
534 //kdDebug( 6036 ) << "pending: *" << pendingSrc.latin1() << "*" << endl;
535 if (savedPrependingSrc) {
536 savedPrependingSrc->append(src);
538 pendingSrc.prepend(src);
540 setSrc(TokenizerString());
541 scriptCodeSize = scriptCodeResync = 0;
543 // will be 0 if script was already loaded and ref() executed it
544 if (!cachedScript.isEmpty())
545 loadingExtScript = true;
547 else if (view && doScriptExec && javascript ) {
548 if (!m_executingScript)
549 pendingSrc.prepend(src);
552 setSrc(TokenizerString());
553 scriptCodeSize = scriptCodeResync = 0;
556 scriptExecution( exScript, QString::null, scriptStartLineno );
557 //kdDebug( 6036 ) << "script execution time:" << dt.elapsed() << endl;
562 scriptCodeSize = scriptCodeResync = 0;
564 if ( !m_executingScript && !loadingExtScript ) {
565 // kdDebug( 6036 ) << "adding pending Output to parsed string" << endl;
566 src.append(pendingSrc);
568 } else if (!prependingSrc.isEmpty()) {
569 // restore first so that the write appends in the right place
570 // (does not hurt to do it again below)
571 currentPrependingSrc = savedPrependingSrc;
573 // we need to do this slightly modified bit of one of the write() cases
574 // because we want to prepend to pendingSrc rather than appending
575 // if there's no previous prependingSrc
576 if (loadingExtScript) {
577 if (currentPrependingSrc) {
578 currentPrependingSrc->append(prependingSrc);
580 pendingSrc.prepend(prependingSrc);
583 write(prependingSrc, false);
587 currentPrependingSrc = savedPrependingSrc;
590 void HTMLTokenizer::scriptExecution( const QString& str, QString scriptURL,
594 if (!view || !view->part())
597 bool oldscript = script;
601 if (scriptURL.isNull())
602 url = static_cast<DocumentImpl*>(view->part()->document().handle())->URL();
606 TokenizerString *savedPrependingSrc = currentPrependingSrc;
607 TokenizerString prependingSrc;
608 currentPrependingSrc = &prependingSrc;
610 #ifdef INSTRUMENT_LAYOUT_SCHEDULING
611 if (!parser->doc()->ownerElement())
612 printf("beginning script execution at %d\n", parser->doc()->elapsedTime());
615 view->part()->executeScript(url,baseLine,Node(),str);
619 #ifdef INSTRUMENT_LAYOUT_SCHEDULING
620 if (!parser->doc()->ownerElement())
621 printf("ending script execution at %d\n", parser->doc()->elapsedTime());
627 if ( !m_executingScript && !loadingExtScript ) {
628 // kdDebug( 6036 ) << "adding pending Output to parsed string" << endl;
629 src.append(pendingSrc);
631 } else if (!prependingSrc.isEmpty()) {
632 // restore first so that the write appends in the right place
633 // (does not hurt to do it again below)
634 currentPrependingSrc = savedPrependingSrc;
636 // we need to do this slightly modified bit of one of the write() cases
637 // because we want to prepend to pendingSrc rather than appending
638 // if there's no previous prependingSrc
639 if (loadingExtScript) {
640 if (currentPrependingSrc) {
641 currentPrependingSrc->append(prependingSrc);
643 pendingSrc.prepend(prependingSrc);
646 write(prependingSrc, false);
650 currentPrependingSrc = savedPrependingSrc;
653 void HTMLTokenizer::parseComment(TokenizerString &src)
655 checkScriptBuffer(src.length());
656 while ( !src.isEmpty() ) {
657 scriptCode[ scriptCodeSize++ ] = *src;
658 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
659 qDebug("comment is now: *%s*",
660 QConstString((QChar*)src.current(), QMIN(16, src.length())).string().latin1());
662 if (src->unicode() == '>') {
663 bool handleBrokenComments = brokenComments && !(script || style);
664 int endCharsCount = 1; // start off with one for the '>' character
665 if (scriptCodeSize > 2 && scriptCode[scriptCodeSize-3] == '-' && scriptCode[scriptCodeSize-2] == '-') {
668 else if (scriptCodeSize > 3 && scriptCode[scriptCodeSize-4] == '-' && scriptCode[scriptCodeSize-3] == '-' &&
669 scriptCode[scriptCodeSize-2] == '!') {
670 // Other browsers will accept --!> as a close comment, even though it's
671 // not technically valid.
674 if (handleBrokenComments || endCharsCount > 1) {
676 if (!( script || xmp || textarea || style)) {
677 if (includesCommentsInDOM) {
679 scriptCode[ scriptCodeSize ] = 0;
680 scriptCode[ scriptCodeSize + 1 ] = 0;
681 currToken.id = ID_COMMENT;
682 processListing(TokenizerString(scriptCode, scriptCodeSize - endCharsCount));
684 currToken.id = ID_COMMENT + ID_CLOSE_TAG;
690 return; // Finished parsing comment
697 void HTMLTokenizer::parseServer(TokenizerString &src)
699 checkScriptBuffer(src.length());
700 while ( !src.isEmpty() ) {
701 scriptCode[ scriptCodeSize++ ] = *src;
702 if (src->unicode() == '>' &&
703 scriptCodeSize > 1 && scriptCode[scriptCodeSize-2] == '%') {
707 return; // Finished parsing server include
713 void HTMLTokenizer::parseProcessingInstruction(TokenizerString &src)
716 while ( !src.isEmpty() )
718 unsigned char chbegin = src->latin1();
719 if(chbegin == '\'') {
720 tquote = tquote == SingleQuote ? NoQuote : SingleQuote;
722 else if(chbegin == '\"') {
723 tquote = tquote == DoubleQuote ? NoQuote : DoubleQuote;
726 // some crappy sites omit the "?" before it, so
727 // we look for an unquoted '>' instead. (IE compatible)
728 else if ( chbegin == '>' && ( !tquote || oldchar == '?' ) )
730 // We got a '?>' sequence
731 processingInstruction = false;
734 return; // Finished parsing comment!
741 void HTMLTokenizer::parseText(TokenizerString &src)
743 while ( !src.isEmpty() )
745 // do we need to enlarge the buffer?
748 // ascii is okay because we only do ascii comparisons
749 unsigned char chbegin = src->latin1();
751 if (skipLF && ( chbegin != '\n' ))
761 else if (( chbegin == '\n' ) || ( chbegin == '\r' ))
779 void HTMLTokenizer::parseEntity(TokenizerString &src, QChar *&dest, bool start)
784 Entity = SearchEntity;
785 EntityUnicodeValue = 0;
788 while( !src.isEmpty() )
790 ushort cc = src->unicode();
793 assert(Entity != NoEntity);
798 cBuffer[cBufferPos++] = cc;
800 Entity = NumericSearch;
808 if(cc == 'x' || cc == 'X') {
809 cBuffer[cBufferPos++] = cc;
811 Entity = Hexadecimal;
813 else if(cc >= '0' && cc <= '9')
816 Entity = SearchSemicolon;
822 int ll = kMin(src.length(), 8);
824 QChar csrc(src->lower());
827 if(csrc.row() || !((cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'f'))) {
830 EntityUnicodeValue = EntityUnicodeValue*16 + (cc - ( cc < 'a' ? '0' : 'a' - 10));
831 cBuffer[cBufferPos++] = cc;
834 Entity = SearchSemicolon;
839 int ll = kMin(src.length(), 9-cBufferPos);
843 if(src->row() || !(cc >= '0' && cc <= '9')) {
844 Entity = SearchSemicolon;
848 EntityUnicodeValue = EntityUnicodeValue * 10 + (cc - '0');
849 cBuffer[cBufferPos++] = cc;
852 if(cBufferPos == 9) Entity = SearchSemicolon;
857 int ll = kMin(src.length(), 9-cBufferPos);
862 if(csrc.row() || !((cc >= 'a' && cc <= 'z') ||
863 (cc >= '0' && cc <= '9') || (cc >= 'A' && cc <= 'Z'))) {
864 Entity = SearchSemicolon;
868 cBuffer[cBufferPos++] = cc;
871 if(cBufferPos == 9) Entity = SearchSemicolon;
872 if(Entity == SearchSemicolon) {
874 const entity *e = findEntity(cBuffer, cBufferPos);
876 EntityUnicodeValue = e->code;
879 if(tag && EntityUnicodeValue > 255 && *src != ';')
880 EntityUnicodeValue = 0;
886 case SearchSemicolon:
888 //kdDebug( 6036 ) << "ENTITY " << EntityUnicodeValue << ", " << res << endl;
890 // Don't allow surrogate code points, or values that are more than 21 bits.
891 if ((EntityUnicodeValue > 0 && EntityUnicodeValue < 0xD800)
892 || (EntityUnicodeValue >= 0xE000 && EntityUnicodeValue <= 0x1FFFFF)) {
897 if (EntityUnicodeValue <= 0xFFFF) {
898 QChar c(EntityUnicodeValue);
903 // Convert to UTF-16, using surrogate code points.
904 QChar c1(0xD800 | (((EntityUnicodeValue >> 16) - 1) << 6) | ((EntityUnicodeValue >> 10) & 0x3F));
905 QChar c2(0xDC00 | (EntityUnicodeValue & 0x3FF));
913 kdDebug( 6036 ) << "unknown entity!" << endl;
916 // ignore the sequence, add it to the buffer as plaintext
918 for(unsigned int i = 0; i < cBufferPos; i++)
919 dest[i] = cBuffer[i];
922 prePos += cBufferPos+1;
931 void HTMLTokenizer::parseTag(TokenizerString &src)
935 while ( !src.isEmpty() )
938 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
940 while(l < src.length() && (*(src.current()+l)).latin1() != '>')
942 qDebug("src is now: *%s*, tquote: %d",
943 QConstString((QChar*)src.current(), l).string().latin1(), tquote);
952 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
957 if (*src == commentStart[searchCount])
960 if (searchCount == 4)
963 kdDebug( 6036 ) << "Found comment" << endl;
965 // Found '<!--' sequence
967 dest = buffer; // ignore the previous part of this tag
971 // Fix bug 34302 at kde.bugs.org. Go ahead and treat
972 // <!--> as a valid comment, since both mozilla and IE on windows
973 // can handle this case. Only do this in quirks mode. -dwh
974 if (!src.isEmpty() && *src == '>' && parser->doc()->inCompatMode()) {
978 cBuffer[cBufferPos++] = src->cell();
983 return; // Finished parsing tag!
985 // cuts of high part, is okay
986 cBuffer[cBufferPos++] = src->cell();
991 searchCount = 0; // Stop looking for '<!--' sequence
995 unsigned int ll = kMin(src.length(), CBUFLEN-cBufferPos);
997 ushort curchar = *src;
998 if(curchar <= ' ' || curchar == '>' ) {
1002 // Use tolower() instead of | 0x20 to lowercase the char because there is no
1003 // performance gain in using | 0x20 since tolower() is optimized and
1004 // | 0x20 turns characters such as '_' into junk.
1005 cBuffer[cBufferPos++] = tolower(curchar);
1009 // Disadvantage: we add the possible rest of the tag
1010 // as attribute names. ### judge if this causes problems
1011 if(finish || CBUFLEN == cBufferPos) {
1013 char* ptr = cBuffer;
1014 unsigned int len = cBufferPos;
1015 cBuffer[cBufferPos] = '\0';
1016 if ((cBufferPos > 0) && (*ptr == '/'))
1027 // Accept empty xml tags like <br/>. We trim off the "/" so that when we call
1028 // getTagID, we'll look up "br" as the tag name and not "br/".
1029 if(len > 1 && ptr[len-1] == '/' )
1032 // Look up the tagID for the specified tag name (now that we've shaved off any
1033 // invalid / that might have followed the name).
1034 uint tagID = getTagID(ptr, len);
1037 QCString tmp(ptr, len+1);
1038 kdDebug( 6036 ) << "Unknown tag: \"" << tmp.data() << "\"" << endl;
1045 QCString tmp(ptr, len+1);
1046 kdDebug( 6036 ) << "found tag id=" << tagID << ": " << tmp.data() << endl;
1048 currToken.id = beginTag ? tagID : tagID + ID_CLOSE_TAG;
1051 tag = SearchAttribute;
1056 case SearchAttribute:
1058 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
1059 qDebug("SearchAttribute");
1061 bool atespace = false;
1063 while(!src.isEmpty()) {
1066 if (curchar == '<' || curchar == '>')
1068 else if(atespace && (curchar == '\'' || curchar == '"'))
1072 attrName = QString::null;
1073 attrNamePresent = false;
1076 tag = AttributeName;
1088 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
1089 qDebug("AttributeName");
1092 int ll = kMin(src.length(), CBUFLEN-cBufferPos);
1096 if(curchar <= '>') {
1097 if(curchar <= ' ' || curchar == '=' || curchar == '>') {
1099 cBuffer[cBufferPos] = '\0';
1100 a = getAttrID(cBuffer, cBufferPos);
1102 attrNamePresent = true;
1104 attrName = QString::fromLatin1(QCString(cBuffer, cBufferPos+1).data());
1105 attrNamePresent = !attrName.isEmpty();
1107 // This is a deliberate quirk to match Mozilla and Opera. We have to do this
1108 // since sites that use the "standards-compliant" path sometimes send
1109 // <script src="foo.js"/>. Both Moz and Opera will honor this, despite it
1110 // being bogus HTML. They do not honor the "/" for other tags. This behavior
1111 // also deviates from WinIE, but in this case we'll just copy Moz and Opera.
1112 if (currToken.id == ID_SCRIPT && curchar == '>' &&
1114 currToken.flat = true;
1120 if (!a || (cBufferPos && *cBuffer == '!'))
1121 kdDebug( 6036 ) << "Unknown attribute: *" << QCString(cBuffer, cBufferPos+1).data() << "*" << endl;
1123 kdDebug( 6036 ) << "Known attribute: " << QCString(cBuffer, cBufferPos+1).data() << endl;
1130 // Use tolower() instead of | 0x20 to lowercase the char because there is no
1131 // performance gain in using | 0x20 since tolower() is optimized and
1132 // | 0x20 turns characters such as '_' into junk.
1133 cBuffer[cBufferPos++] = tolower(curchar);
1136 if ( cBufferPos == CBUFLEN ) {
1137 cBuffer[cBufferPos] = '\0';
1138 attrName = QString::fromLatin1(QCString(cBuffer, cBufferPos+1).data());
1139 attrNamePresent = !attrName.isEmpty();
1148 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
1149 qDebug("SearchEqual");
1152 bool atespace = false;
1153 while(!src.isEmpty()) {
1154 curchar = src->unicode();
1156 if(curchar == '=') {
1158 kdDebug(6036) << "found equal" << endl;
1163 else if(atespace && (curchar == '\'' || curchar == '"'))
1167 attrName = QString::null;
1168 attrNamePresent = false;
1171 currToken.addAttribute(parser->docPtr()->document(), buffer, attrName, emptyAtom);
1173 tag = SearchAttribute;
1185 while(!src.isEmpty()) {
1186 curchar = src->unicode();
1188 if(( curchar == '\'' || curchar == '\"' )) {
1189 tquote = curchar == '\"' ? DoubleQuote : SingleQuote;
1203 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
1204 qDebug("QuotedValue");
1207 while(!src.isEmpty()) {
1210 curchar = src->unicode();
1211 if (curchar == '>' && !attrNamePresent) {
1212 // Handle a case like <img '>. Just go ahead and be willing
1213 // to close the whole tag. Don't consume the character and
1214 // just go back into SearchEnd while ignoring the whole
1216 // FIXME: Note that this is actually not a very good solution. It's
1217 // an interim hack and doesn't handle the general case of
1218 // unmatched quotes among attributes that have names. -dwh
1219 while(dest > buffer+1 && (*(dest-1) == '\n' || *(dest-1) == '\r'))
1220 dest--; // remove trailing newlines
1221 AtomicString v(buffer+1, dest-buffer-1);
1222 attrName.setUnicode(buffer+1,dest-buffer-1);
1223 currToken.addAttribute(parser->docPtr()->document(), buffer, attrName, v);
1224 tag = SearchAttribute;
1230 if(curchar <= '\'' && !src.escaped()) {
1231 // ### attributes like '&{blaa....};' are supposed to be treated as jscript.
1232 if ( curchar == '&' )
1235 parseEntity(src, dest, true);
1238 else if ( (tquote == SingleQuote && curchar == '\'') ||
1239 (tquote == DoubleQuote && curchar == '\"') )
1241 // some <input type=hidden> rely on trailing spaces. argh
1242 while(dest > buffer+1 && (*(dest-1) == '\n' || *(dest-1) == '\r'))
1243 dest--; // remove trailing newlines
1244 AtomicString v(buffer+1, dest-buffer-1);
1245 if (!attrNamePresent)
1246 attrName.setUnicode(buffer+1,dest-buffer-1);
1247 currToken.addAttribute(parser->docPtr()->document(), buffer, attrName, v);
1250 tag = SearchAttribute;
1265 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
1269 while(!src.isEmpty()) {
1271 curchar = src->unicode();
1272 if(curchar <= '>' && !src.escaped()) {
1274 if ( curchar == '&' )
1277 parseEntity(src, dest, true);
1280 // no quotes. Every space means end of value
1281 // '/' does not delimit in IE!
1282 if ( curchar <= ' ' || curchar == '>' )
1284 AtomicString v(buffer+1, dest-buffer-1);
1285 currToken.addAttribute(parser->docPtr()->document(), buffer, attrName, v);
1287 tag = SearchAttribute;
1301 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
1302 qDebug("SearchEnd");
1304 while(!src.isEmpty()) {
1305 if (*src == '>' || *src == '<')
1309 currToken.flat = true;
1313 if (src.isEmpty()) break;
1315 searchCount = 0; // Stop looking for '<!--' sequence
1322 if ( !currToken.id ) //stop if tag is unknown
1325 uint tagID = currToken.id;
1326 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 0
1327 kdDebug( 6036 ) << "appending Tag: " << tagID << endl;
1329 bool beginTag = !currToken.flat && (tagID < ID_CLOSE_TAG);
1331 if (tagID >= ID_CLOSE_TAG)
1332 tagID -= ID_CLOSE_TAG;
1333 else if (tagID == ID_SCRIPT) {
1334 AttributeImpl* a = 0;
1335 bool foundTypeAttribute = false;
1336 scriptSrc = QString::null;
1337 scriptSrcCharset = QString::null;
1338 if ( currToken.attrs && /* potentially have a ATTR_SRC ? */
1339 parser->doc()->part() &&
1340 parser->doc()->part()->jScriptEnabled() && /* jscript allowed at all? */
1341 view /* are we a regular tokenizer or just for innerHTML ? */
1343 if ( ( a = currToken.attrs->getAttributeItem( ATTR_SRC ) ) )
1344 scriptSrc = parser->doc()->completeURL(parseURL( a->value() ).string() );
1345 if ( ( a = currToken.attrs->getAttributeItem( ATTR_CHARSET ) ) )
1346 scriptSrcCharset = a->value().string().stripWhiteSpace();
1347 if ( scriptSrcCharset.isEmpty() )
1348 scriptSrcCharset = parser->doc()->part()->encoding();
1349 /* Check type before language, since language is deprecated */
1350 if ((a = currToken.attrs->getAttributeItem(ATTR_TYPE)) != 0 && !a->value().string().isEmpty())
1351 foundTypeAttribute = true;
1353 a = currToken.attrs->getAttributeItem(ATTR_LANGUAGE);
1357 if( foundTypeAttribute ) {
1359 Mozilla 1.5 accepts application/x-javascript, and some web references claim it is the only
1360 correct variation, but WinIE 6 doesn't accept it.
1361 Neither Mozilla 1.5 nor WinIE 6 accept application/javascript, application/ecmascript, or
1362 application/x-ecmascript.
1363 Mozilla 1.5 doesn't accept the text/javascript1.x formats, but WinIE 6 does.
1364 Mozilla 1.5 doesn't accept text/jscript, text/ecmascript, and text/livescript, but WinIE 6 does.
1365 Mozilla 1.5 allows leading and trailing whitespace, but WinIE 6 doesn't.
1366 Mozilla 1.5 and WinIE 6 both accept the empty string, but neither accept a whitespace-only string.
1367 We want to accept all the values that either of these browsers accept, but not other values.
1369 QString type = a->value().string().stripWhiteSpace().lower();
1370 if( type.compare("application/x-javascript") != 0 &&
1371 type.compare("text/javascript") != 0 &&
1372 type.compare("text/javascript1.0") != 0 &&
1373 type.compare("text/javascript1.1") != 0 &&
1374 type.compare("text/javascript1.2") != 0 &&
1375 type.compare("text/javascript1.3") != 0 &&
1376 type.compare("text/javascript1.4") != 0 &&
1377 type.compare("text/javascript1.5") != 0 &&
1378 type.compare("text/jscript") != 0 &&
1379 type.compare("text/ecmascript") != 0 &&
1380 type.compare("text/livescript") )
1384 Mozilla 1.5 doesn't accept jscript or ecmascript, but WinIE 6 does.
1385 Mozilla 1.5 accepts javascript1.0, javascript1.4, and javascript1.5, but WinIE 6 accepts only 1.1 - 1.3.
1386 Neither Mozilla 1.5 nor WinIE 6 accept leading or trailing whitespace.
1387 We want to accept all the values that either of these browsers accept, but not other values.
1389 QString lang = a->value().string();
1390 lang = lang.lower();
1391 if( lang.compare("") != 0 &&
1392 lang.compare("javascript") != 0 &&
1393 lang.compare("javascript1.0") != 0 &&
1394 lang.compare("javascript1.1") != 0 &&
1395 lang.compare("javascript1.2") != 0 &&
1396 lang.compare("javascript1.3") != 0 &&
1397 lang.compare("javascript1.4") != 0 &&
1398 lang.compare("javascript1.5") != 0 &&
1399 lang.compare("ecmascript") != 0 &&
1400 lang.compare("livescript") != 0 &&
1401 lang.compare("jscript") )
1408 // we have to take care to close the pre block in
1409 // case we encounter an unallowed element....
1410 if(pre && beginTag && !DOM::checkChild(ID_PRE, tagID)) {
1411 kdDebug(6036) << " not allowed in <pre> " << (int)tagID << endl;
1422 searchStopper = scriptEnd;
1423 searchStopperLen = 8;
1427 else if (tagID < ID_CLOSE_TAG) // Handle <script src="foo"/>
1432 searchStopper = styleEnd;
1433 searchStopperLen = 7;
1440 searchStopper = textareaEnd;
1441 searchStopperLen = 10;
1448 searchStopper = titleEnd;
1449 searchStopperLen = 7;
1456 searchStopper = xmpEnd;
1457 searchStopperLen = 5;
1466 plaintext = beginTag;
1470 if (beginTag && endTag[tagID] == FORBIDDEN)
1471 // Don't discard LFs since this element has no end tag.
1472 discard = NoneDiscard;
1474 return; // Finished parsing tag!
1481 void HTMLTokenizer::addPending()
1483 if ( select && !script )
1487 else if ( textarea || script )
1490 case LFPending: *dest++ = '\n'; prePos = 0; break;
1491 case SpacePending: *dest++ = ' '; ++prePos; break;
1492 case TabPending: *dest++ = '\t'; prePos += TAB_SIZE - (prePos % TAB_SIZE); break;
1504 // Insert a breaking space
1505 *dest++ = QChar(' ');
1516 p = TAB_SIZE - ( prePos % TAB_SIZE );
1518 qDebug("tab pending, prePos: %d, toadd: %d", prePos, p);
1521 for ( int x = 0; x < p; x++ )
1522 *dest++ = QChar(' ');
1532 pending = NonePending;
1535 void HTMLTokenizer::write(const TokenizerString &str, bool appendData)
1538 kdDebug( 6036 ) << this << " Tokenizer::write(\"" << str << "\"," << appendData << ")" << endl;
1544 if ( ( m_executingScript && appendData ) || !cachedScript.isEmpty() ) {
1545 // don't parse; we will do this later
1546 if (currentPrependingSrc) {
1547 currentPrependingSrc->append(str);
1549 pendingSrc.append(str);
1564 // Once a timer is set, it has control of when the tokenizer continues.
1572 #ifdef INSTRUMENT_LAYOUT_SCHEDULING
1573 if (!parser->doc()->ownerElement())
1574 printf("Beginning write at time %d\n", parser->doc()->elapsedTime());
1578 // parseEntity(src, dest);
1580 int processedCount = 0;
1583 KWQUIEventTime eventTime;
1585 while (!src.isEmpty() && (!parser->doc()->part() || !parser->doc()->part()->isScheduledLocationChangePending())) {
1586 if (!continueProcessing(processedCount, startTime, eventTime))
1589 // do we need to enlarge the buffer?
1592 ushort cc = src->unicode();
1594 if (skipLF && (cc != '\n'))
1602 parseEntity( src, dest );
1603 else if ( plaintext )
1619 else if (processingInstruction)
1620 parseProcessingInstruction(src);
1623 else if ( startTag )
1633 searchCount = 1; // Look for '<!--' sequence to start comment
1639 // xml processing instruction
1640 processingInstruction = true;
1642 parseProcessingInstruction(src);
1648 if (!brokenServer) {
1649 // <% server stuff, handle as comment %>
1655 // else fall through
1658 if( ((cc >= 'a') && (cc <= 'z')) || ((cc >= 'A') && (cc <= 'Z')))
1660 // Start of a Start-Tag
1676 // pre context always gets its spaces/linefeeds
1677 if ( pre || script || (!parser->selectMode() &&
1678 (!parser->noSpaces() || dest > buffer ))) {
1680 discard = AllDiscard; // So we discard the first LF after the open tag.
1684 pending = NonePending;
1687 if (cc == '/' && discard == AllDiscard)
1688 discard = NoneDiscard; // A close tag. No need to discard LF.
1696 else if ( cc == '&' && !src.escaped())
1701 parseEntity(src, dest, true);
1703 else if ( cc == '<' && !src.escaped())
1705 tagStartLineno = lineno+src.lineCount();
1709 else if (( cc == '\n' ) || ( cc == '\r' ))
1711 if (select && !script)
1713 if (discard == LFDiscard)
1716 discard = NoneDiscard; // We have discarded 1 LF
1718 else if(discard == AllDiscard)
1724 if (pending == NonePending)
1725 pending = LFPending;
1729 if (discard == LFDiscard || discard == AllDiscard)
1732 discard = NoneDiscard; // We have discarded 1 LF
1739 pending = LFPending;
1743 /* Check for MS-DOS CRLF sequence */
1750 else if (( cc == ' ' ) || ( cc == '\t' ))
1752 if (select && !script) {
1753 if(discard == SpaceDiscard)
1754 discard = NoneDiscard;
1755 else if(discard == AllDiscard)
1758 pending = SpacePending;
1762 if (discard == AllDiscard)
1763 discard = NoneDiscard;
1768 pending = SpacePending;
1770 pending = TabPending;
1780 discard = NoneDiscard;
1785 #if QT_VERSION < 300
1786 unsigned char row = src->row();
1787 if ( row > 0x05 && row < 0x10 || row > 0xfd )
1788 currToken.complexText = true;
1797 #ifdef INSTRUMENT_LAYOUT_SCHEDULING
1798 if (!parser->doc()->ownerElement())
1799 printf("Ending write at time %d\n", parser->doc()->elapsedTime());
1806 if (noMoreData && !loadingExtScript && !m_executingScript && !timerId)
1807 end(); // this actually causes us to be deleted
1810 void HTMLTokenizer::stopped()
1818 bool HTMLTokenizer::processingData() const
1820 return timerId != 0;
1823 bool HTMLTokenizer::continueProcessing(int& processedCount, const QTime& startTime, const KWQUIEventTime& eventTime)
1825 // We don't want to be checking elapsed time with every character, so we only check after we've
1826 // processed a certain number of characters.
1827 bool allowedYield = allowYield;
1829 if (!loadingExtScript && !m_executingScript && (processedCount > TOKENIZER_CHUNK_SIZE || allowedYield)) {
1831 if (startTime.elapsed() > TOKENIZER_TIME_DELAY) {
1832 /* FIXME: We'd like to yield aggressively to give stylesheets the opportunity to
1833 load, but this hurts overall performance on slower machines. For now turn this
1835 || (!parser->doc()->haveStylesheetsLoaded() &&
1836 (parser->doc()->documentElement()->id() != ID_HTML || parser->doc()->body()))) {*/
1837 // Schedule the timer to keep processing as soon as possible.
1839 timerId = startTimer(0);
1840 #ifdef INSTRUMENT_LAYOUT_SCHEDULING
1841 if (eventTime.uiEventPending())
1842 printf("Deferring processing of data because of UI event.\n");
1843 else if (startTime.elapsed() > TOKENIZER_TIME_DELAY)
1844 printf("Deferring processing of data because 200ms elapsed away from event loop.\n");
1854 void HTMLTokenizer::timerEvent(QTimerEvent* e)
1856 if (e->timerId() == timerId) {
1861 #ifdef INSTRUMENT_LAYOUT_SCHEDULING
1862 if (!parser->doc()->ownerElement())
1863 printf("Beginning timer write at time %d\n", parser->doc()->elapsedTime());
1866 if (parser->doc()->view() && parser->doc()->view()->layoutPending() && !parser->doc()->minimumLayoutDelay()) {
1867 // Restart the timer and let layout win. This is basically a way of ensuring that the layout
1868 // timer has higher priority than our timer.
1869 timerId = startTimer(0);
1873 // Invoke write() as though more data came in.
1874 bool oldNoMoreData = noMoreData;
1875 noMoreData = false; // This prevents write() from deleting the tokenizer.
1876 write(TokenizerString(), true);
1877 noMoreData = oldNoMoreData;
1879 // If the timer dies (and stays dead after the write), we need to let WebKit know that we're done processing the data.
1884 void HTMLTokenizer::allDataProcessed()
1886 if (noMoreData && !loadingExtScript && !m_executingScript && !onHold && !timerId) {
1887 if (!parser || !parser->doc() || !parser->doc()->part())
1889 KHTMLPart* part = parser->doc()->part();
1891 part->tokenizerProcessedData();
1895 void HTMLTokenizer::end()
1897 assert(timerId == 0);
1904 if ( buffer == 0 ) {
1906 emit finishedParsing();
1910 // parseTag is using the buffer for different matters
1915 KHTML_DELETE_QCHAR_VEC(buffer);
1918 KHTML_DELETE_QCHAR_VEC(scriptCode);
1921 scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0;
1924 emit finishedParsing();
1927 void HTMLTokenizer::finish()
1929 // do this as long as we don't find matching comment ends
1930 while((comment || server) && scriptCode && scriptCodeSize)
1932 // we've found an unmatched comment start
1934 brokenComments = true;
1936 brokenServer = true;
1937 checkScriptBuffer();
1938 scriptCode[ scriptCodeSize ] = 0;
1939 scriptCode[ scriptCodeSize + 1 ] = 0;
1942 if (script || style) {
1943 food.setUnicode(scriptCode, scriptCodeSize);
1947 food += QString(scriptCode, scriptCodeSize);
1950 pos = QConstString(scriptCode, scriptCodeSize).string().find('>');
1951 food.setUnicode(scriptCode+pos+1, scriptCodeSize-pos-1); // deep copy
1953 KHTML_DELETE_QCHAR_VEC(scriptCode);
1955 scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0;
1956 comment = server = false;
1957 if ( !food.isEmpty() )
1960 // this indicates we will not receive any more data... but if we are waiting on
1961 // an external script to load, we can't finish parsing until that is done
1963 if (!loadingExtScript && !m_executingScript && !onHold && !timerId)
1964 end(); // this actually causes us to be deleted
1967 void HTMLTokenizer::processToken()
1969 KJSProxy *jsProxy = (view && view->part()) ? view->part()->jScript() : 0L;
1971 jsProxy->setEventHandlerLineno(tagStartLineno);
1972 if ( dest > buffer )
1976 qDebug( "unexpected token id: %d, str: *%s*", currToken.id,QConstString( buffer,dest-buffer ).string().latin1() );
1981 currToken.text = new DOMStringImpl( buffer, dest - buffer );
1982 currToken.text->ref();
1983 if (currToken.id != ID_COMMENT)
1984 currToken.id = ID_TEXT;
1986 else if(!currToken.id) {
1989 jsProxy->setEventHandlerLineno(lineno+src.lineCount());
1996 QString name = getTagName(currToken.id).string();
1999 text = QConstString(currToken.text->s, currToken.text->l).string();
2001 kdDebug( 6036 ) << "Token --> " << name << " id = " << currToken.id << endl;
2003 kdDebug( 6036 ) << "Token is FLAT!" << endl;
2005 kdDebug( 6036 ) << "text: \"" << text << "\"" << endl;
2006 unsigned long l = currToken.attrs ? currToken.attrs->length() : 0;
2008 kdDebug( 6036 ) << "Attributes: " << l << endl;
2009 for (unsigned long i = 0; i < l; ++i) {
2010 AttributeImpl* c = currToken.attrs->attributeItem(i);
2011 kdDebug( 6036 ) << " " << c->id() << " " << parser->doc()->getDocument()->attrName(c->id()).string()
2012 << "=\"" << c->value().string() << "\"" << endl;
2015 kdDebug( 6036 ) << endl;
2017 // pass the token over to the parser, the parser DOES NOT delete the token
2018 parser->parseToken(&currToken);
2022 jsProxy->setEventHandlerLineno(0);
2025 HTMLTokenizer::~HTMLTokenizer()
2033 void HTMLTokenizer::enlargeBuffer(int len)
2035 int newsize = kMax(size*2, size+len);
2036 int oldoffs = (dest - buffer);
2038 buffer = (QChar*)realloc(buffer, newsize*sizeof(QChar));
2039 dest = buffer + oldoffs;
2043 void HTMLTokenizer::enlargeScriptBuffer(int len)
2045 int newsize = kMax(scriptCodeMaxSize*2, scriptCodeMaxSize+len);
2046 scriptCode = (QChar*)realloc(scriptCode, newsize*sizeof(QChar));
2047 scriptCodeMaxSize = newsize;
2050 void HTMLTokenizer::notifyFinished(CachedObject */*finishedObj*/)
2052 #ifdef INSTRUMENT_LAYOUT_SCHEDULING
2053 if (!parser->doc()->ownerElement())
2054 printf("script loaded at %d\n", parser->doc()->elapsedTime());
2057 assert(!cachedScript.isEmpty());
2058 bool finished = false;
2059 while (!finished && cachedScript.head()->isLoaded()) {
2061 kdDebug( 6036 ) << "Finished loading an external script" << endl;
2063 CachedScript* cs = cachedScript.dequeue();
2064 DOMString scriptSource = cs->script();
2066 kdDebug( 6036 ) << "External script is:" << endl << scriptSource.string() << endl;
2068 setSrc(TokenizerString());
2070 // make sure we forget about the script before we execute the new one
2071 // infinite recursion might happen otherwise
2072 QString cachedScriptUrl( cs->url().string() );
2075 #ifdef INSTRUMENT_LAYOUT_SCHEDULING
2076 if (!parser->doc()->ownerElement())
2077 printf("external script beginning execution at %d\n", parser->doc()->elapsedTime());
2080 scriptExecution( scriptSource.string(), cachedScriptUrl );
2082 // The state of cachedScript.isEmpty() can change inside the scriptExecution()
2083 // call above, so test afterwards.
2084 finished = cachedScript.isEmpty();
2086 loadingExtScript = false;
2087 #ifdef INSTRUMENT_LAYOUT_SCHEDULING
2088 if (!parser->doc()->ownerElement())
2089 printf("external script finished execution at %d\n", parser->doc()->elapsedTime());
2093 // 'script' is true when we are called synchronously from
2094 // parseScript(). In that case parseScript() will take care
2095 // of 'scriptOutput'.
2097 TokenizerString rest = pendingSrc;
2100 // we might be deleted at this point, do not
2101 // access any members.
2106 bool HTMLTokenizer::isWaitingForScripts() const
2108 return loadingExtScript;
2111 void HTMLTokenizer::setSrc(const TokenizerString &source)
2113 lineno += src.lineCount();
2115 src.resetLineCount();
2118 void HTMLTokenizer::setOnHold(bool _onHold)