URLParser should ignore tabs at all possible locations
authorachristensen@apple.com <achristensen@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Thu, 29 Sep 2016 18:18:04 +0000 (18:18 +0000)
committerachristensen@apple.com <achristensen@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Thu, 29 Sep 2016 18:18:04 +0000 (18:18 +0000)
https://bugs.webkit.org/show_bug.cgi?id=162711

Reviewed by Tim Horton.

Source/WebCore:

The URL spec says to remove all tabs and newlines before parsing a URL.
To reduce passes on the URL and copies of data, I chose to just ignore them every time I increment the iterator.
This is fragile, but faster.  It can be completely tested, though.  That is what this patch does.

Covered by an addition to the API tests that tries inserting one tab at each location of each test.

* platform/URLParser.cpp:
(WebCore::URLParser::advance):
(WebCore::URLParser::isWindowsDriveLetter):
(WebCore::URLParser::appendWindowsDriveLetter):
(WebCore::URLParser::isPercentEncodedDot):
(WebCore::URLParser::isSingleDotPathSegment):
(WebCore::URLParser::isDoubleDotPathSegment):
(WebCore::URLParser::consumeSingleDotPathSegment):
(WebCore::URLParser::consumeDoubleDotPathSegment):
(WebCore::URLParser::checkLocalhostCodePoint):
(WebCore::URLParser::isAtLocalhost):
(WebCore::URLParser::isLocalhost):
(WebCore::URLParser::URLParser):
(WebCore::URLParser::parse):
(WebCore::isPercentEncodedDot): Deleted.
(WebCore::isSingleDotPathSegment): Deleted.
(WebCore::isDoubleDotPathSegment): Deleted.
(WebCore::consumeSingleDotPathSegment): Deleted.
(WebCore::consumeDoubleDotPathSegment): Deleted.
* platform/URLParser.h:
(WebCore::URLParser::advance):

Tools:

* TestWebKitAPI/Tests/WebCore/URLParser.cpp:
(TestWebKitAPI::ExpectedParts::isInvalid):
(TestWebKitAPI::checkURL):
(TestWebKitAPI::TEST_F):

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@206592 268f45cc-cd09-0410-ab3c-d52691b4dbfc

Source/WebCore/ChangeLog
Source/WebCore/platform/URLParser.cpp
Source/WebCore/platform/URLParser.h
Tools/ChangeLog
Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp

index 16e28cb..a557c02 100644 (file)
@@ -1,3 +1,38 @@
+2016-09-29  Alex Christensen  <achristensen@webkit.org>
+
+        URLParser should ignore tabs at all possible locations
+        https://bugs.webkit.org/show_bug.cgi?id=162711
+
+        Reviewed by Tim Horton.
+
+        The URL spec says to remove all tabs and newlines before parsing a URL.
+        To reduce passes on the URL and copies of data, I chose to just ignore them every time I increment the iterator.
+        This is fragile, but faster.  It can be completely tested, though.  That is what this patch does.
+
+        Covered by an addition to the API tests that tries inserting one tab at each location of each test.
+
+        * platform/URLParser.cpp:
+        (WebCore::URLParser::advance):
+        (WebCore::URLParser::isWindowsDriveLetter):
+        (WebCore::URLParser::appendWindowsDriveLetter):
+        (WebCore::URLParser::isPercentEncodedDot):
+        (WebCore::URLParser::isSingleDotPathSegment):
+        (WebCore::URLParser::isDoubleDotPathSegment):
+        (WebCore::URLParser::consumeSingleDotPathSegment):
+        (WebCore::URLParser::consumeDoubleDotPathSegment):
+        (WebCore::URLParser::checkLocalhostCodePoint):
+        (WebCore::URLParser::isAtLocalhost):
+        (WebCore::URLParser::isLocalhost):
+        (WebCore::URLParser::URLParser):
+        (WebCore::URLParser::parse):
+        (WebCore::isPercentEncodedDot): Deleted.
+        (WebCore::isSingleDotPathSegment): Deleted.
+        (WebCore::isDoubleDotPathSegment): Deleted.
+        (WebCore::consumeSingleDotPathSegment): Deleted.
+        (WebCore::consumeDoubleDotPathSegment): Deleted.
+        * platform/URLParser.h:
+        (WebCore::URLParser::advance):
+
 2016-09-29  Simon Fraser  <simon.fraser@apple.com>
 
         Fix hit testing on display:block <svg> elements
index b1b49bc..c05897e 100644 (file)
@@ -414,12 +414,13 @@ template<typename CharacterType> ALWAYS_INLINE static bool isSlashQuestionOrHash
 template<typename CharacterType> ALWAYS_INLINE static bool isValidSchemeCharacter(CharacterType character) { return character <= 'z' && characterClassTable[character] & Scheme; }
 static bool shouldPercentEncodeQueryByte(uint8_t byte) { return characterClassTable[byte] & QueryPercent; }
 
-template<typename CharacterType>
+template<typename CharacterType, URLParser::ReportSyntaxViolation reportSyntaxViolation>
 ALWAYS_INLINE void URLParser::advance(CodePointIterator<CharacterType>& iterator, const CodePointIterator<CharacterType>& iteratorForSyntaxViolationPosition)
 {
     ++iterator;
     while (UNLIKELY(!iterator.atEnd() && isTabOrNewline(*iterator))) {
-        syntaxViolation(iteratorForSyntaxViolationPosition);
+        if (reportSyntaxViolation == ReportSyntaxViolation::Yes)
+            syntaxViolation(iteratorForSyntaxViolationPosition);
         ++iterator;
     }
 }
@@ -429,15 +430,13 @@ ALWAYS_INLINE bool URLParser::isWindowsDriveLetter(CodePointIterator<CharacterTy
 {
     if (iterator.atEnd() || !isASCIIAlpha(*iterator))
         return false;
-    advance(iterator);
+    advance<CharacterType, ReportSyntaxViolation::No>(iterator);
     if (iterator.atEnd())
         return false;
     if (*iterator == ':')
         return true;
-    if (UNLIKELY(*iterator == '|')) {
-        syntaxViolation(iterator);
+    if (UNLIKELY(*iterator == '|'))
         return true;
-    }
     return false;
 }
 
@@ -464,6 +463,8 @@ void URLParser::appendWindowsDriveLetter(CodePointIterator<CharacterType>& itera
     advance(iterator);
     ASSERT(!iterator.atEnd());
     ASSERT(*iterator == ':' || *iterator == '|');
+    if (*iterator == '|')
+        syntaxViolation(iterator);
     appendToASCIIBuffer(':');
     advance(iterator);
 }
@@ -818,93 +819,93 @@ void URLParser::copyURLPartsUntil(const URL& base, URLPart part, const CodePoint
 static const char* dotASCIICode = "2e";
 
 template<typename CharacterType>
-ALWAYS_INLINE static bool isPercentEncodedDot(CodePointIterator<CharacterType> c)
+ALWAYS_INLINE bool URLParser::isPercentEncodedDot(CodePointIterator<CharacterType> c)
 {
     if (c.atEnd())
         return false;
     if (*c != '%')
         return false;
-    ++c;
+    advance<CharacterType, ReportSyntaxViolation::No>(c);
     if (c.atEnd())
         return false;
     if (*c != dotASCIICode[0])
         return false;
-    ++c;
+    advance<CharacterType, ReportSyntaxViolation::No>(c);
     if (c.atEnd())
         return false;
     return toASCIILower(*c) == dotASCIICode[1];
 }
 
 template<typename CharacterType>
-ALWAYS_INLINE static bool isSingleDotPathSegment(CodePointIterator<CharacterType> c)
+ALWAYS_INLINE bool URLParser::isSingleDotPathSegment(CodePointIterator<CharacterType> c)
 {
     if (c.atEnd())
         return false;
     if (*c == '.') {
-        ++c;
+        advance<CharacterType, ReportSyntaxViolation::No>(c);
         return c.atEnd() || isSlashQuestionOrHash(*c);
     }
     if (*c != '%')
         return false;
-    ++c;
+    advance<CharacterType, ReportSyntaxViolation::No>(c);
     if (c.atEnd() || *c != dotASCIICode[0])
         return false;
-    ++c;
+    advance<CharacterType, ReportSyntaxViolation::No>(c);
     if (c.atEnd())
         return false;
     if (toASCIILower(*c) == dotASCIICode[1]) {
-        ++c;
+        advance<CharacterType, ReportSyntaxViolation::No>(c);
         return c.atEnd() || isSlashQuestionOrHash(*c);
     }
     return false;
 }
 
 template<typename CharacterType>
-ALWAYS_INLINE static bool isDoubleDotPathSegment(CodePointIterator<CharacterType> c)
+ALWAYS_INLINE bool URLParser::isDoubleDotPathSegment(CodePointIterator<CharacterType> c)
 {
     if (c.atEnd())
         return false;
     if (*c == '.') {
-        ++c;
+        advance<CharacterType, ReportSyntaxViolation::No>(c);
         return isSingleDotPathSegment(c);
     }
     if (*c != '%')
         return false;
-    ++c;
+    advance<CharacterType, ReportSyntaxViolation::No>(c);
     if (c.atEnd() || *c != dotASCIICode[0])
         return false;
-    ++c;
+    advance<CharacterType, ReportSyntaxViolation::No>(c);
     if (c.atEnd())
         return false;
     if (toASCIILower(*c) == dotASCIICode[1]) {
-        ++c;
+        advance<CharacterType, ReportSyntaxViolation::No>(c);
         return isSingleDotPathSegment(c);
     }
     return false;
 }
 
 template<typename CharacterType>
-static void consumeSingleDotPathSegment(CodePointIterator<CharacterType>& c)
+void URLParser::consumeSingleDotPathSegment(CodePointIterator<CharacterType>& c)
 {
     ASSERT(isSingleDotPathSegment(c));
     if (*c == '.') {
-        ++c;
+        advance(c);
         if (!c.atEnd()) {
             if (*c == '/' || *c == '\\')
-                ++c;
+                advance(c);
             else
                 ASSERT(*c == '?' || *c == '#');
         }
     } else {
         ASSERT(*c == '%');
-        ++c;
+        advance(c);
         ASSERT(*c == dotASCIICode[0]);
-        ++c;
+        advance(c);
         ASSERT(toASCIILower(*c) == dotASCIICode[1]);
-        ++c;
+        advance(c);
         if (!c.atEnd()) {
             if (*c == '/' || *c == '\\')
-                ++c;
+                advance(c);
             else
                 ASSERT(*c == '?' || *c == '#');
         }
@@ -912,18 +913,18 @@ static void consumeSingleDotPathSegment(CodePointIterator<CharacterType>& c)
 }
 
 template<typename CharacterType>
-static void consumeDoubleDotPathSegment(CodePointIterator<CharacterType>& c)
+void URLParser::consumeDoubleDotPathSegment(CodePointIterator<CharacterType>& c)
 {
     ASSERT(isDoubleDotPathSegment(c));
     if (*c == '.')
-        ++c;
+        advance(c);
     else {
         ASSERT(*c == '%');
-        ++c;
+        advance(c);
         ASSERT(*c == dotASCIICode[0]);
-        ++c;
+        advance(c);
         ASSERT(toASCIILower(*c) == dotASCIICode[1]);
-        ++c;
+        advance(c);
     }
     consumeSingleDotPathSegment(c);
 }
@@ -991,6 +992,46 @@ void URLParser::failure()
     m_url.m_string = m_inputString;
 }
 
+template<typename CharacterType>
+bool URLParser::checkLocalhostCodePoint(CodePointIterator<CharacterType>& iterator, UChar32 codePoint)
+{
+    if (iterator.atEnd() || toASCIILower(*iterator) != codePoint)
+        return false;
+    advance<CharacterType, ReportSyntaxViolation::No>(iterator);
+    return true;
+}
+
+template<typename CharacterType>
+bool URLParser::isAtLocalhost(CodePointIterator<CharacterType> iterator)
+{
+    if (!checkLocalhostCodePoint(iterator, 'l'))
+        return false;
+    if (!checkLocalhostCodePoint(iterator, 'o'))
+        return false;
+    if (!checkLocalhostCodePoint(iterator, 'c'))
+        return false;
+    if (!checkLocalhostCodePoint(iterator, 'a'))
+        return false;
+    if (!checkLocalhostCodePoint(iterator, 'l'))
+        return false;
+    if (!checkLocalhostCodePoint(iterator, 'h'))
+        return false;
+    if (!checkLocalhostCodePoint(iterator, 'o'))
+        return false;
+    if (!checkLocalhostCodePoint(iterator, 's'))
+        return false;
+    if (!checkLocalhostCodePoint(iterator, 't'))
+        return false;
+    return iterator.atEnd();
+}
+
+bool URLParser::isLocalhost(StringView view)
+{
+    if (view.is8Bit())
+        return isAtLocalhost(CodePointIterator<LChar>(view.characters8(), view.characters8() + view.length()));
+    return isAtLocalhost(CodePointIterator<UChar>(view.characters16(), view.characters16() + view.length()));
+}
+
 ALWAYS_INLINE StringView URLParser::parsedDataView(size_t start, size_t length)
 {
     if (UNLIKELY(m_didSeeSyntaxViolation)) {
@@ -1028,9 +1069,20 @@ URLParser::URLParser(const String& input, const URL& base, const TextEncoding& e
         m_inputBegin = input.characters16();
         parse(input.characters16(), input.length(), base, encoding);
     }
+
     ASSERT(!m_url.m_isValid
         || m_didSeeSyntaxViolation == (m_url.string() != input)
         || (input.isEmpty() && m_url.m_string == base.m_string));
+    ASSERT(internalValuesConsistent(m_url));
+#if !ASSERT_DISABLED
+    if (!m_didSeeSyntaxViolation) {
+        // Force a syntax violation at the beginning to make sure we get the same result.
+        URLParser parser(makeString(" ", input), base, encoding);
+        URL parsed = parser.result();
+        if (parsed.isValid())
+            ASSERT(allValuesEqual(parser.result(), m_url));
+    }
+#endif
 }
 
 template<typename CharacterType>
@@ -1211,7 +1263,7 @@ void URLParser::parse(const CharacterType* input, const unsigned length, const U
             if (*c == '/') {
                 appendToASCIIBuffer('/');
                 state = State::AuthorityOrHost;
-                ++c;
+                advance(c);
                 m_url.m_userStart = currentPosition(c);
                 authorityOrHostBegin = c;
             } else {
@@ -1305,6 +1357,7 @@ void URLParser::parse(const CharacterType* input, const unsigned length, const U
                     auto lastAt = c;
                     auto findLastAt = c;
                     while (!findLastAt.atEnd()) {
+                        LOG(URLParser, "Finding last @: %c", *findLastAt);
                         if (*findLastAt == '@')
                             lastAt = findLastAt;
                         bool isSlash = *findLastAt == '/' || (m_urlIsSpecial && *findLastAt == '\\');
@@ -1342,23 +1395,25 @@ void URLParser::parse(const CharacterType* input, const unsigned length, const U
             } while (!c.atEnd());
             break;
         case State::Host:
-            LOG_STATE("Host");
-            if (*c == '/' || *c == '?' || *c == '#') {
-                if (!parseHostAndPort(CodePointIterator<CharacterType>(authorityOrHostBegin, c))) {
-                    failure();
-                    return;
-                }
-                if (*c == '?' || *c == '#') {
-                    syntaxViolation(c);
-                    appendToASCIIBuffer('/');
-                    m_url.m_pathAfterLastSlash = currentPosition(c);
+            do {
+                LOG_STATE("Host");
+                if (*c == '/' || *c == '?' || *c == '#') {
+                    if (!parseHostAndPort(CodePointIterator<CharacterType>(authorityOrHostBegin, c))) {
+                        failure();
+                        return;
+                    }
+                    if (*c == '?' || *c == '#') {
+                        syntaxViolation(c);
+                        appendToASCIIBuffer('/');
+                        m_url.m_pathAfterLastSlash = currentPosition(c);
+                    }
+                    state = State::Path;
+                    break;
                 }
-                state = State::Path;
-                break;
-            }
-            if (isPercentOrNonASCII(*c))
-                m_hostHasPercentOrNonASCII = true;
-            ++c;
+                if (isPercentOrNonASCII(*c))
+                    m_hostHasPercentOrNonASCII = true;
+                ++c;
+            } while (!c.atEnd());
             break;
         case State::File:
             LOG_STATE("File");
@@ -1426,8 +1481,8 @@ void URLParser::parse(const CharacterType* input, const unsigned length, const U
             if (LIKELY(*c == '/' || *c == '\\')) {
                 if (UNLIKELY(*c == '\\'))
                     syntaxViolation(c);
-                ++c;
                 appendToASCIIBuffer('/');
+                advance(c);
                 m_url.m_userStart = currentPosition(c);
                 m_url.m_userEnd = m_url.m_userStart;
                 m_url.m_passwordEnd = m_url.m_userStart;
@@ -1463,55 +1518,57 @@ void URLParser::parse(const CharacterType* input, const unsigned length, const U
             state = State::Path;
             break;
         case State::FileHost:
-            LOG_STATE("FileHost");
-            if (isSlashQuestionOrHash(*c)) {
-                bool windowsQuirk = c.codeUnitsSince(authorityOrHostBegin) == 2 && isWindowsDriveLetter(authorityOrHostBegin);
-                if (windowsQuirk) {
-                    syntaxViolation(authorityOrHostBegin);
-                    appendToASCIIBuffer('/');
-                    appendWindowsDriveLetter(authorityOrHostBegin);
-                }
-                if (windowsQuirk || authorityOrHostBegin == c) {
-                    ASSERT(windowsQuirk || parsedDataView(currentPosition(c) - 1, 1) == "/");
-                    if (UNLIKELY(*c == '?')) {
-                        syntaxViolation(c);
-                        appendToASCIIBuffer("/?", 2);
-                        ++c;
-                        m_url.m_pathAfterLastSlash = currentPosition(c) - 1;
-                        m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
-                        state = State::Query;
+            do {
+                LOG_STATE("FileHost");
+                if (isSlashQuestionOrHash(*c)) {
+                    bool windowsQuirk = c.codeUnitsSince(authorityOrHostBegin) == 2 && isWindowsDriveLetter(authorityOrHostBegin);
+                    if (windowsQuirk) {
+                        syntaxViolation(authorityOrHostBegin);
+                        appendToASCIIBuffer('/');
+                        appendWindowsDriveLetter(authorityOrHostBegin);
+                    }
+                    if (windowsQuirk || authorityOrHostBegin == c) {
+                        ASSERT(windowsQuirk || parsedDataView(currentPosition(c) - 1, 1) == "/");
+                        if (UNLIKELY(*c == '?')) {
+                            syntaxViolation(c);
+                            appendToASCIIBuffer("/?", 2);
+                            ++c;
+                            m_url.m_pathAfterLastSlash = currentPosition(c) - 1;
+                            m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
+                            state = State::Query;
+                            break;
+                        }
+                        if (UNLIKELY(*c == '#')) {
+                            syntaxViolation(c);
+                            appendToASCIIBuffer("/#", 2);
+                            ++c;
+                            m_url.m_pathAfterLastSlash = currentPosition(c) - 1;
+                            m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
+                            m_url.m_queryEnd = m_url.m_pathAfterLastSlash;
+                            state = State::Fragment;
+                            break;
+                        }
+                        state = State::Path;
                         break;
                     }
-                    if (UNLIKELY(*c == '#')) {
+                    if (!parseHostAndPort(CodePointIterator<CharacterType>(authorityOrHostBegin, c))) {
+                        failure();
+                        return;
+                    }
+                    if (UNLIKELY(isLocalhost(parsedDataView(m_url.m_passwordEnd, currentPosition(c) - m_url.m_passwordEnd)))) {
                         syntaxViolation(c);
-                        appendToASCIIBuffer("/#", 2);
-                        ++c;
-                        m_url.m_pathAfterLastSlash = currentPosition(c) - 1;
-                        m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
-                        m_url.m_queryEnd = m_url.m_pathAfterLastSlash;
-                        state = State::Fragment;
-                        break;
+                        m_asciiBuffer.shrink(m_url.m_passwordEnd);
+                        m_url.m_hostEnd = currentPosition(c);
+                        m_url.m_portEnd = m_url.m_hostEnd;
                     }
-                    state = State::Path;
+                    
+                    state = State::PathStart;
                     break;
                 }
-                if (!parseHostAndPort(CodePointIterator<CharacterType>(authorityOrHostBegin, c))) {
-                    failure();
-                    return;
-                }
-                if (UNLIKELY(equalLettersIgnoringASCIICase(parsedDataView(m_url.m_passwordEnd, currentPosition(c) - m_url.m_passwordEnd), "localhost"))) {
-                    syntaxViolation(c);
-                    m_asciiBuffer.shrink(m_url.m_passwordEnd);
-                    m_url.m_hostEnd = currentPosition(c);
-                    m_url.m_portEnd = m_url.m_hostEnd;
-                }
-                
-                state = State::PathStart;
-                break;
-            }
-            if (isPercentOrNonASCII(*c))
-                m_hostHasPercentOrNonASCII = true;
-            ++c;
+                if (isPercentOrNonASCII(*c))
+                    m_hostHasPercentOrNonASCII = true;
+                ++c;
+            } while (!c.atEnd());
             break;
         case State::PathStart:
             LOG_STATE("PathStart");
@@ -1553,16 +1610,15 @@ void URLParser::parse(const CharacterType* input, const unsigned length, const U
                 state = State::Fragment;
                 break;
             }
-            if (isPercentEncodedDot(c)) {
-                if (UNLIKELY(*c != '.'))
-                    syntaxViolation(c);
+            if (UNLIKELY(isPercentEncodedDot(c))) {
+                syntaxViolation(c);
                 appendToASCIIBuffer('.');
                 ASSERT(*c == '%');
-                ++c;
+                advance(c);
                 ASSERT(*c == dotASCIICode[0]);
-                ++c;
+                advance(c);
                 ASSERT(toASCIILower(*c) == dotASCIICode[1]);
-                ++c;
+                advance(c);
                 break;
             }
             utf8PercentEncode<isInDefaultEncodeSet>(c);
@@ -1777,7 +1833,7 @@ void URLParser::parse(const CharacterType* input, const unsigned length, const U
         }
 
         syntaxViolation(c);
-        if (equalLettersIgnoringASCIICase(parsedDataView(m_url.m_passwordEnd, currentPosition(c) - m_url.m_passwordEnd), "localhost")) {
+        if (isLocalhost(parsedDataView(m_url.m_passwordEnd, currentPosition(c) - m_url.m_passwordEnd))) {
             m_asciiBuffer.shrink(m_url.m_passwordEnd);
             m_url.m_hostEnd = currentPosition(c);
             m_url.m_portEnd = m_url.m_hostEnd;
@@ -1835,7 +1891,6 @@ void URLParser::parse(const CharacterType* input, const unsigned length, const U
     }
     m_url.m_isValid = true;
     LOG(URLParser, "Parsed URL <%s>", m_url.m_string.utf8().data());
-    ASSERT(internalValuesConsistent(m_url));
 }
 
 template<typename CharacterType>
index 42bac46..268f36e 100644 (file)
@@ -66,12 +66,23 @@ private:
     template<typename CharacterType> bool parsePort(CodePointIterator<CharacterType>&);
 
     void failure();
-    template<typename CharacterType> void advance(CodePointIterator<CharacterType>& iterator) { advance(iterator, iterator); }
-    template<typename CharacterType> void advance(CodePointIterator<CharacterType>&, const CodePointIterator<CharacterType>& iteratorForSyntaxViolationPosition);
+    enum class ReportSyntaxViolation { No, Yes };
+    template<typename CharacterType, ReportSyntaxViolation reportSyntaxViolation = ReportSyntaxViolation::Yes>
+    void advance(CodePointIterator<CharacterType>& iterator) { advance<CharacterType, reportSyntaxViolation>(iterator, iterator); }
+    template<typename CharacterType, ReportSyntaxViolation = ReportSyntaxViolation::Yes>
+    void advance(CodePointIterator<CharacterType>&, const CodePointIterator<CharacterType>& iteratorForSyntaxViolationPosition);
     template<typename CharacterType> void syntaxViolation(const CodePointIterator<CharacterType>&);
     template<typename CharacterType> void fragmentSyntaxViolation(const CodePointIterator<CharacterType>&);
+    template<typename CharacterType> bool isPercentEncodedDot(CodePointIterator<CharacterType>);
     template<typename CharacterType> bool isWindowsDriveLetter(CodePointIterator<CharacterType>);
+    template<typename CharacterType> bool isSingleDotPathSegment(CodePointIterator<CharacterType>);
+    template<typename CharacterType> bool isDoubleDotPathSegment(CodePointIterator<CharacterType>);
     template<typename CharacterType> bool shouldCopyFileURL(CodePointIterator<CharacterType>);
+    template<typename CharacterType> bool checkLocalhostCodePoint(CodePointIterator<CharacterType>&, UChar32);
+    template<typename CharacterType> bool isAtLocalhost(CodePointIterator<CharacterType>);
+    bool isLocalhost(StringView);
+    template<typename CharacterType> void consumeSingleDotPathSegment(CodePointIterator<CharacterType>&);
+    template<typename CharacterType> void consumeDoubleDotPathSegment(CodePointIterator<CharacterType>&);
     template<typename CharacterType> void appendWindowsDriveLetter(CodePointIterator<CharacterType>&);
     template<typename CharacterType> size_t currentPosition(const CodePointIterator<CharacterType>&);
     template<typename UnsignedIntegerType> void appendNumberToASCIIBuffer(UnsignedIntegerType);
@@ -92,7 +103,7 @@ private:
     using IPv6Address = std::array<uint16_t, 8>;
     template<typename CharacterType> Optional<IPv6Address> parseIPv6Host(CodePointIterator<CharacterType>);
     void serializeIPv6Piece(uint16_t piece);
-    void serializeIPv6(URLParser::IPv6Address);
+    void serializeIPv6(IPv6Address);
 
     enum class URLPart;
     template<typename CharacterType> void copyURLPartsUntil(const URL& base, URLPart, const CodePointIterator<CharacterType>&);
index 98117ec..05f7fb4 100644 (file)
@@ -1,3 +1,15 @@
+2016-09-29  Alex Christensen  <achristensen@webkit.org>
+
+        URLParser should ignore tabs at all possible locations
+        https://bugs.webkit.org/show_bug.cgi?id=162711
+
+        Reviewed by Tim Horton.
+
+        * TestWebKitAPI/Tests/WebCore/URLParser.cpp:
+        (TestWebKitAPI::ExpectedParts::isInvalid):
+        (TestWebKitAPI::checkURL):
+        (TestWebKitAPI::TEST_F):
+
 2016-09-29  Gyuyoung Kim  <gyuyoung.kim@navercorp.com>
 
         [EFL] Add search button to url bar in MiniBrowser
index 0ebf9c3..3887f5e 100644 (file)
@@ -49,6 +49,18 @@ struct ExpectedParts {
     String query;
     String fragment;
     String string;
+
+    bool isInvalid() const
+    {
+        return protocol.isEmpty()
+            && user.isEmpty()
+            && password.isEmpty()
+            && host.isEmpty()
+            && !port
+            && path.isEmpty()
+            && query.isEmpty()
+            && fragment.isEmpty();
+    }
 };
 
 static bool eq(const String& s1, const String& s2)
@@ -57,7 +69,7 @@ static bool eq(const String& s1, const String& s2)
     return s1.utf8() == s2.utf8();
 }
 
-static void checkURL(const String& urlString, const ExpectedParts& parts)
+static void checkURL(const String& urlString, const ExpectedParts& parts, bool checkTabs = true)
 {
     bool wasEnabled = URLParser::enabled();
     URLParser::setEnabled(true);
@@ -89,6 +101,14 @@ static void checkURL(const String& urlString, const ExpectedParts& parts)
     EXPECT_TRUE(URLParser::allValuesEqual(url, oldURL));
     EXPECT_TRUE(URLParser::internalValuesConsistent(url));
     EXPECT_TRUE(URLParser::internalValuesConsistent(oldURL));
+
+    if (checkTabs) {
+        for (size_t i = 0; i < urlString.length(); ++i) {
+            String urlStringWithTab = makeString(urlString.substring(0, i), "\t", urlString.substring(i));
+            ExpectedParts invalidPartsWithTab = {"", "", "", "", 0, "" , "", "", urlStringWithTab};
+            checkURL(urlStringWithTab, parts.isInvalid() ? invalidPartsWithTab : parts, false);
+        }
+    }
 }
 
 template<size_t length>
@@ -275,7 +295,7 @@ TEST_F(URLParserTest, Basic)
     checkURL("http://:@host", {"http", "", "", "host", 0, "/", "", "", "http://host/"});
 }
 
-static void checkRelativeURL(const String& urlString, const String& baseURLString, const ExpectedParts& parts)
+static void checkRelativeURL(const String& urlString, const String& baseURLString, const ExpectedParts& parts, bool checkTabs = true)
 {
     bool wasEnabled = URLParser::enabled();
     URLParser::setEnabled(true);
@@ -307,6 +327,14 @@ static void checkRelativeURL(const String& urlString, const String& baseURLStrin
     EXPECT_TRUE(URLParser::allValuesEqual(url, oldURL));
     EXPECT_TRUE(URLParser::internalValuesConsistent(url));
     EXPECT_TRUE(URLParser::internalValuesConsistent(oldURL));
+    
+    if (checkTabs) {
+        for (size_t i = 0; i < urlString.length(); ++i) {
+            String urlStringWithTab = makeString(urlString.substring(0, i), "\t", urlString.substring(i));
+            ExpectedParts invalidPartsWithTab = {"", "", "", "", 0, "" , "", "", urlStringWithTab};
+            checkRelativeURL(urlStringWithTab, baseURLString, parts.isInvalid() ? invalidPartsWithTab : parts, false);
+        }
+    }
 }
 
 TEST_F(URLParserTest, ParseRelative)
@@ -403,6 +431,8 @@ static void checkURLDifferences(const String& urlString, const ExpectedParts& pa
     EXPECT_FALSE(URLParser::allValuesEqual(url, oldURL));
     EXPECT_TRUE(URLParser::internalValuesConsistent(url));
     EXPECT_TRUE(URLParser::internalValuesConsistent(oldURL));
+    
+    // FIXME: check tabs here like we do for checkURL and checkRelativeURL.
 }
 
 static void checkRelativeURLDifferences(const String& urlString, const String& baseURLString, const ExpectedParts& partsNew, const ExpectedParts& partsOld)
@@ -437,6 +467,8 @@ static void checkRelativeURLDifferences(const String& urlString, const String& b
     EXPECT_FALSE(URLParser::allValuesEqual(url, oldURL));
     EXPECT_TRUE(URLParser::internalValuesConsistent(url));
     EXPECT_TRUE(URLParser::internalValuesConsistent(oldURL));
+
+    // FIXME: check tabs here like we do for checkURL and checkRelativeURL.
 }
 
 // These are differences between the new URLParser and the old URL::parse which make URLParser more standards compliant.
@@ -859,13 +891,13 @@ TEST_F(URLParserTest, AdditionalTests)
         {"ws", "", "", "", 0, "", "", "", "ws:"},
         {"ws", "", "", "", 0, "s:", "", "", "ws:s:"});
     checkRelativeURL("notspecial:", "http://example.org/foo/bar", {"notspecial", "", "", "", 0, "", "", "", "notspecial:"});
-    
+
     const wchar_t surrogateBegin = 0xD800;
     const wchar_t validSurrogateEnd = 0xDD55;
     const wchar_t invalidSurrogateEnd = 'A';
     checkURL(utf16String<12>({'h', 't', 't', 'p', ':', '/', '/', 'w', '/', surrogateBegin, validSurrogateEnd, '\0'}),
-        {"http", "", "", "w", 0, "/%F0%90%85%95", "", "", "http://w/%F0%90%85%95"});
-    
+        {"http", "", "", "w", 0, "/%F0%90%85%95", "", "", "http://w/%F0%90%85%95"}, false);
+
     // URLParser matches Chrome and Firefox but not URL::parse.
     checkURLDifferences(utf16String<12>({'h', 't', 't', 'p', ':', '/', '/', 'w', '/', surrogateBegin, invalidSurrogateEnd}),
         {"http", "", "", "w", 0, "/%EF%BF%BDA", "", "", "http://w/%EF%BF%BDA"},
@@ -897,6 +929,8 @@ static void checkURL(const String& urlString, const TextEncoding& encoding, cons
     EXPECT_TRUE(eq(parts.query, url.query()));
     EXPECT_TRUE(eq(parts.fragment, url.fragmentIdentifier()));
     EXPECT_TRUE(eq(parts.string, url.string()));
+
+    // FIXME: check tabs here like we do for checkURL and checkRelativeURL.
 }
 
 TEST_F(URLParserTest, QueryEncoding)