From a560e964180cebc0b4083c9f8b30ab17bfcaf1d4 Mon Sep 17 00:00:00 2001 From: "darin@apple.com" Date: Mon, 12 Nov 2007 04:09:30 +0000 Subject: [PATCH] Reviewed by Mitz. - http://bugs.webkit.org/show_bug.cgi?id=15944 streamline SegmentedString to speed up parsing I measured a speed-up of the page load test while developing this patch. I don't have a precise figure, though. * html/HTMLTokenizer.h: Removed unneeded lineNumberPtr() function. Also renamed lineno to m_lineNumber. * html/HTMLTokenizer.cpp: (WebCore::HTMLTokenizer::processListing): Don't pass 0 to the advance function since we don't want to update a line number. (WebCore::HTMLTokenizer::parseSpecial): Ditto. (WebCore::HTMLTokenizer::parseComment): Pass the line number data member directly instead of lineNumberPtr() since the advance function now takes a reference. (WebCore::HTMLTokenizer::parseServer): Ditto. (WebCore::HTMLTokenizer::parseProcessingInstruction): Ditto. (WebCore::HTMLTokenizer::parseText): Ditto. (WebCore::HTMLTokenizer::parseEntity): Ditto. (WebCore::HTMLTokenizer::parseTag): Ditto. (WebCore::HTMLTokenizer::write): Ditto. * loader/FTPDirectoryDocument.cpp: (WebCore::FTPDirectoryTokenizer::write): * loader/TextDocument.cpp: (WebCore::TextTokenizer::write): Don't pass 0 to the advance function. * platform/SegmentedString.h: (WebCore::SegmentedString::advance): Streamlined the most common case, and pushed less common cases into a separate function that is not inlined. Also got rid of a branch by separating the case with a line number from the case without one. * platform/SegmentedString.cpp: (WebCore::SegmentedString::advanceSlowCase): Added. The aforementioned less common cases are here. git-svn-id: https://svn.webkit.org/repository/webkit/trunk@27699 268f45cc-cd09-0410-ab3c-d52691b4dbfc --- WebCore/ChangeLog | 37 +++++++++ WebCore/html/HTMLTokenizer.cpp | 103 ++++++++++++------------ WebCore/html/HTMLTokenizer.h | 8 +- WebCore/loader/FTPDirectoryDocument.cpp | 2 +- WebCore/loader/TextDocument.cpp | 5 +- WebCore/platform/SegmentedString.cpp | 31 ++++++- WebCore/platform/SegmentedString.h | 37 +++++---- 7 files changed, 144 insertions(+), 79 deletions(-) diff --git a/WebCore/ChangeLog b/WebCore/ChangeLog index 73be6c41a2bc..65c2a6841247 100644 --- a/WebCore/ChangeLog +++ b/WebCore/ChangeLog @@ -1,3 +1,40 @@ +2007-11-11 Darin Adler + + Reviewed by Mitz. + + - http://bugs.webkit.org/show_bug.cgi?id=15944 + streamline SegmentedString to speed up parsing + + I measured a speed-up of the page load test while developing this patch. I don't + have a precise figure, though. + + * html/HTMLTokenizer.h: Removed unneeded lineNumberPtr() function. Also renamed + lineno to m_lineNumber. + * html/HTMLTokenizer.cpp: + (WebCore::HTMLTokenizer::processListing): Don't pass 0 to the advance function + since we don't want to update a line number. + (WebCore::HTMLTokenizer::parseSpecial): Ditto. + (WebCore::HTMLTokenizer::parseComment): Pass the line number data member directly + instead of lineNumberPtr() since the advance function now takes a reference. + (WebCore::HTMLTokenizer::parseServer): Ditto. + (WebCore::HTMLTokenizer::parseProcessingInstruction): Ditto. + (WebCore::HTMLTokenizer::parseText): Ditto. + (WebCore::HTMLTokenizer::parseEntity): Ditto. + (WebCore::HTMLTokenizer::parseTag): Ditto. + (WebCore::HTMLTokenizer::write): Ditto. + + * loader/FTPDirectoryDocument.cpp: (WebCore::FTPDirectoryTokenizer::write): + * loader/TextDocument.cpp: (WebCore::TextTokenizer::write): + Don't pass 0 to the advance function. + + * platform/SegmentedString.h: (WebCore::SegmentedString::advance): Streamlined + the most common case, and pushed less common cases into a separate function + that is not inlined. Also got rid of a branch by separating the case with a + line number from the case without one. + + * platform/SegmentedString.cpp: (WebCore::SegmentedString::advanceSlowCase): + Added. The aforementioned less common cases are here. + 2007-11-11 Antti Koivisto Forgot to do this review change (and test HTTP commit). diff --git a/WebCore/html/HTMLTokenizer.cpp b/WebCore/html/HTMLTokenizer.cpp index a9c8160c474d..3c7f8a4f6295 100644 --- a/WebCore/html/HTMLTokenizer.cpp +++ b/WebCore/html/HTMLTokenizer.cpp @@ -242,7 +242,7 @@ void HTMLTokenizer::begin() noMoreData = false; brokenComments = false; brokenServer = false; - lineno = 0; + m_lineNumber = 0; scriptStartLineno = 0; tagStartLineno = 0; m_state.setForceSynchronous(false); @@ -261,7 +261,7 @@ HTMLTokenizer::State HTMLTokenizer::processListing(SegmentedString list, State s if (state.skipLF()) { state.setSkipLF(false); if (*list == '\n') { - list.advance(0); + list.advance(); continue; } } @@ -279,11 +279,11 @@ HTMLTokenizer::State HTMLTokenizer::processListing(SegmentedString list, State s if (*list == '\r') state.setSkipLF(true); - list.advance(0); + list.advance(); } else { state.setDiscardLF(false); *dest++ = *list; - list.advance(0); + list.advance(); } } @@ -296,7 +296,7 @@ HTMLTokenizer::State HTMLTokenizer::parseSpecial(SegmentedString &src, State sta ASSERT(!state.hasTagState()); ASSERT(state.inXmp() + state.inTextArea() + state.inTitle() + state.inStyle() + state.inScript() == 1 ); if (state.inScript()) - scriptStartLineno = lineno; + scriptStartLineno = m_lineNumber; if (state.inComment()) state = parseComment(src, state); @@ -311,7 +311,7 @@ HTMLTokenizer::State HTMLTokenizer::parseSpecial(SegmentedString &src, State sta continue; } if (scriptCodeResync && !tquote && ch == '>') { - src.advance(lineNumberPtr()); + src.advance(m_lineNumber); scriptCodeSize = scriptCodeResync-1; scriptCodeResync = 0; scriptCode[ scriptCodeSize ] = scriptCode[ scriptCodeSize + 1 ] = 0; @@ -363,12 +363,12 @@ HTMLTokenizer::State HTMLTokenizer::parseSpecial(SegmentedString &src, State sta state.setEscaped(!state.escaped() && ch == '\\'); if (!scriptCodeResync && (state.inTextArea() || state.inTitle()) && !src.escaped() && ch == '&') { UChar* scriptCodeDest = scriptCode+scriptCodeSize; - src.advance(lineNumberPtr()); + src.advance(m_lineNumber); state = parseEntity(src, scriptCodeDest, state, m_cBufferPos, true, false); scriptCodeSize = scriptCodeDest-scriptCode; } else { scriptCode[scriptCodeSize++] = *src; - src.advance(lineNumberPtr()); + src.advance(m_lineNumber); } } @@ -578,7 +578,7 @@ HTMLTokenizer::State HTMLTokenizer::parseComment(SegmentedString &src, State sta endCharsCount = 4; } if (handleBrokenComments || endCharsCount > 1) { - src.advance(lineNumberPtr()); + src.advance(m_lineNumber); if (!(state.inTitle() || state.inScript() || state.inXmp() || state.inTextArea() || state.inStyle())) { checkScriptBuffer(); scriptCode[scriptCodeSize] = 0; @@ -596,7 +596,7 @@ HTMLTokenizer::State HTMLTokenizer::parseComment(SegmentedString &src, State sta return state; // Finished parsing comment } } - src.advance(lineNumberPtr()); + src.advance(m_lineNumber); } return state; @@ -609,12 +609,12 @@ HTMLTokenizer::State HTMLTokenizer::parseServer(SegmentedString& src, State stat scriptCode[scriptCodeSize++] = *src; if (*src == '>' && scriptCodeSize > 1 && scriptCode[scriptCodeSize-2] == '%') { - src.advance(lineNumberPtr()); + src.advance(m_lineNumber); state.setInServer(false); scriptCodeSize = 0; return state; // Finished parsing server include } - src.advance(lineNumberPtr()); + src.advance(m_lineNumber); } return state; } @@ -634,11 +634,11 @@ HTMLTokenizer::State HTMLTokenizer::parseProcessingInstruction(SegmentedString & else if (chbegin == '>' && (!tquote || oldchar == '?')) { // We got a '?>' sequence state.setInProcessingInstruction(false); - src.advance(lineNumberPtr()); + src.advance(m_lineNumber); state.setDiscardLF(true); return state; // Finished parsing comment! } - src.advance(lineNumberPtr()); + src.advance(m_lineNumber); oldchar = chbegin; } @@ -653,7 +653,7 @@ HTMLTokenizer::State HTMLTokenizer::parseText(SegmentedString &src, State state) if (state.skipLF()) { state.setSkipLF(false); if (cc == '\n') { - src.advance(lineNumberPtr()); + src.advance(m_lineNumber); continue; } } @@ -666,7 +666,7 @@ HTMLTokenizer::State HTMLTokenizer::parseText(SegmentedString &src, State state) *dest++ = '\n'; } else *dest++ = cc; - src.advance(lineNumberPtr()); + src.advance(m_lineNumber); } return state; @@ -693,7 +693,7 @@ HTMLTokenizer::State HTMLTokenizer::parseEntity(SegmentedString &src, UChar*& de case SearchEntity: if(cc == '#') { cBuffer[cBufferPos++] = cc; - src.advance(lineNumberPtr()); + src.advance(m_lineNumber); state.setEntityState(NumericSearch); } else @@ -704,7 +704,7 @@ HTMLTokenizer::State HTMLTokenizer::parseEntity(SegmentedString &src, UChar*& de case NumericSearch: if (cc == 'x' || cc == 'X') { cBuffer[cBufferPos++] = cc; - src.advance(lineNumberPtr()); + src.advance(m_lineNumber); state.setEntityState(Hexadecimal); } else if (cc >= '0' && cc <= '9') state.setEntityState(Decimal); @@ -727,7 +727,7 @@ HTMLTokenizer::State HTMLTokenizer::parseEntity(SegmentedString &src, UChar*& de digit = (cc - 'A' + 10) & 0xF; // handle both upper and lower case without a branch EntityUnicodeValue = EntityUnicodeValue * 16 + digit; cBuffer[cBufferPos++] = cc; - src.advance(lineNumberPtr()); + src.advance(m_lineNumber); } if (cBufferPos == 10) state.setEntityState(SearchSemicolon); @@ -746,7 +746,7 @@ HTMLTokenizer::State HTMLTokenizer::parseEntity(SegmentedString &src, UChar*& de EntityUnicodeValue = EntityUnicodeValue * 10 + (cc - '0'); cBuffer[cBufferPos++] = cc; - src.advance(lineNumberPtr()); + src.advance(m_lineNumber); } if (cBufferPos == 9) state.setEntityState(SearchSemicolon); @@ -764,7 +764,7 @@ HTMLTokenizer::State HTMLTokenizer::parseEntity(SegmentedString &src, UChar*& de } cBuffer[cBufferPos++] = cc; - src.advance(lineNumberPtr()); + src.advance(m_lineNumber); } if (cBufferPos == 9) state.setEntityState(SearchSemicolon); @@ -787,7 +787,7 @@ HTMLTokenizer::State HTMLTokenizer::parseEntity(SegmentedString &src, UChar*& de if (EntityUnicodeValue > 0 && EntityUnicodeValue <= 0x10FFFF) { if (!inViewSourceMode()) { if (*src == ';') - src.advance(lineNumberPtr()); + src.advance(m_lineNumber); if (EntityUnicodeValue <= 0xFFFF) { checkBuffer(); src.push(fixUpChar(EntityUnicodeValue)); @@ -806,7 +806,7 @@ HTMLTokenizer::State HTMLTokenizer::parseEntity(SegmentedString &src, UChar*& de dest += cBufferPos; if (*src == ';') { *dest++ = ';'; - src.advance(lineNumberPtr()); + src.advance(m_lineNumber); } } } else { @@ -832,7 +832,6 @@ HTMLTokenizer::State HTMLTokenizer::parseTag(SegmentedString &src, State state) unsigned cBufferPos = m_cBufferPos; - int* lineNoPtr = lineNumberPtr(); bool lastIsSlash = false; while (!src.isEmpty()) { @@ -859,7 +858,7 @@ HTMLTokenizer::State HTMLTokenizer::parseTag(SegmentedString &src, State state) kdDebug( 6036 ) << "Found comment" << endl; #endif // Found '