+2007-11-18 Alexey Proskuryakov <ap@webkit.org>
+
+ Reviewed by Maciej.
+
+ <rdar://problem/5546393> Whitespace handling doesn't match HTML5.
+
+ HTML5 definition is the same as MSIE's, with the exception that the latter strips null
+ characters. Firefox also treats U+0008 as whitespace, but not U+000B or U+000C.
+
+ Test: fast/parser/html-whitespace.html
+
+ * html/HTMLTokenizer.cpp:
+ (WebCore::HTMLTokenizer::parseSpecial):
+ (WebCore::HTMLTokenizer::parseTag):
+ Use isASCIISpace, which matches HTML5 definition of whitespace, and also what we use to
+ check for whitespace almost everywhere.
+
2007-11-17 Mark Rowe <mrowe@apple.com>
Reviewed by Darin Adler.
return state;
}
// possible end of tagname, lets check.
- if (!scriptCodeResync && !state.escaped() && !src.escaped() && (ch == '>' || ch == '/' || ch <= ' ') && ch &&
+ if (!scriptCodeResync && !state.escaped() && !src.escaped() && (ch == '>' || ch == '/' || isASCIISpace(ch)) &&
scriptCodeSize >= searchStopperLen &&
tagMatch( searchStopper, scriptCode+scriptCodeSize-searchStopperLen, searchStopperLen )) {
scriptCodeResync = scriptCodeSize-searchStopperLen+1;
unsigned int ll = min(src.length(), CBUFLEN - cBufferPos);
while (ll--) {
UChar curchar = *src;
- if (curchar <= ' ' || curchar == '>' || curchar == '<') {
+ if (isASCIISpace(curchar) || curchar == '>' || curchar == '<') {
finish = true;
break;
}
while(!src.isEmpty()) {
UChar curchar = *src;
// In this mode just ignore any quotes we encounter and treat them like spaces.
- if (curchar > ' ' && curchar != '\'' && curchar != '"') {
+ if (!isASCIISpace(curchar) && curchar != '\'' && curchar != '"') {
if (curchar == '<' || curchar == '>')
state.setTagState(SearchEnd);
else
UChar curchar = *src;
// If we encounter a "/" when scanning an attribute name, treat it as a delimiter. This allows the
// cases like <input type=checkbox checked/> to work (and accommodates XML-style syntax as per HTML5).
- if (curchar <= '>' && (curchar >= '<' || curchar <= ' ' || curchar == '/')) {
+ if (curchar <= '>' && (curchar >= '<' || isASCIISpace(curchar) || curchar == '/')) {
cBuffer[cBufferPos] = '\0';
attrName = AtomicString(cBuffer);
dest = buffer;
}
// In this mode just ignore any quotes or slashes we encounter and treat them like spaces.
- if (curchar > ' ' && curchar != '\'' && curchar != '"' && curchar != '/') {
+ if (!isASCIISpace(curchar) && curchar != '\'' && curchar != '"' && curchar != '/') {
if(curchar == '=') {
#ifdef TOKEN_DEBUG
kdDebug(6036) << "found equal" << endl;
}
break;
case SearchValue:
- while(!src.isEmpty()) {
+ while (!src.isEmpty()) {
UChar curchar = *src;
- if(curchar > ' ') {
- if(( curchar == '\'' || curchar == '\"' )) {
+ if (!isASCIISpace(curchar)) {
+ if (curchar == '\'' || curchar == '\"') {
tquote = curchar == '\"' ? DoubleQuote : SingleQuote;
state.setTagState(QuotedValue);
if (inViewSourceMode())
}
// no quotes. Every space means end of value
// '/' does not delimit in IE!
- if ( curchar <= ' ' || curchar == '>' )
- {
+ if (isASCIISpace(curchar) || curchar == '>') {
AtomicString v(buffer+1, dest-buffer-1);
currToken.addAttribute(m_doc, attrName, v, inViewSourceMode());
if (inViewSourceMode())