[Content Extensions] Test regular expression parse failures.
authorachristensen@apple.com <achristensen@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Thu, 19 Mar 2015 22:37:35 +0000 (22:37 +0000)
committerachristensen@apple.com <achristensen@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Thu, 19 Mar 2015 22:37:35 +0000 (22:37 +0000)
https://bugs.webkit.org/show_bug.cgi?id=142872

Reviewed by Benjamin Poulain.

Source/WebCore:

* contentextensions/URLFilterParser.cpp:
(WebCore::ContentExtensions::GraphBuilder::finalize):
(WebCore::ContentExtensions::GraphBuilder::quantifyAtom):
(WebCore::ContentExtensions::URLFilterParser::addPattern):
(WebCore::ContentExtensions::URLFilterParser::statusString):
* contentextensions/URLFilterParser.h:
Changed failures that can never happen to assertions.

Tools:

* TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp:
(TestWebKitAPI::testPatternStatus):
(TestWebKitAPI::TEST_F):
Added tests for each parsing status.

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@181762 268f45cc-cd09-0410-ab3c-d52691b4dbfc

Source/WebCore/ChangeLog
Source/WebCore/contentextensions/URLFilterParser.cpp
Source/WebCore/contentextensions/URLFilterParser.h
Tools/ChangeLog
Tools/TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp

index 1661e6b..abd8a04 100644 (file)
@@ -1,3 +1,18 @@
+2015-03-19  Alex Christensen  <achristensen@webkit.org>
+
+        [Content Extensions] Test regular expression parse failures.
+        https://bugs.webkit.org/show_bug.cgi?id=142872
+
+        Reviewed by Benjamin Poulain.
+
+        * contentextensions/URLFilterParser.cpp:
+        (WebCore::ContentExtensions::GraphBuilder::finalize):
+        (WebCore::ContentExtensions::GraphBuilder::quantifyAtom):
+        (WebCore::ContentExtensions::URLFilterParser::addPattern):
+        (WebCore::ContentExtensions::URLFilterParser::statusString):
+        * contentextensions/URLFilterParser.h:
+        Changed failures that can never happen to assertions.
+
 2015-03-19  Enrica Casucci  <enrica@apple.com>
 
         <attachment> should put URLs on the pasteboard so that Finder can accept drops.
index 26fec83..a488f7a 100644 (file)
@@ -477,15 +477,10 @@ public:
             m_subtreeEnd = m_lastPrefixTreeEntry->nfaNode;
         }
         
-        if (!m_openGroups.isEmpty()) {
-            fail(URLFilterParser::UnclosedGroups);
-            return;
-        }
-
-        if (m_subtreeStart != m_subtreeEnd)
-            m_nfa.setFinal(m_subtreeEnd, m_patternId);
-        else
-            fail(URLFilterParser::CannotMatchAnything);
+        ASSERT_WITH_MESSAGE(m_openGroups.isEmpty(), "An unclosed group should be a parsing error in YARR.");
+        ASSERT_WITH_MESSAGE(m_subtreeStart != m_subtreeEnd, "This regex cannot match anything");
+        
+        m_nfa.setFinal(m_subtreeEnd, m_patternId);
     }
 
     URLFilterParser::ParseStatus parseStatus() const
@@ -529,8 +524,7 @@ public:
         if (hasError())
             return;
 
-        if (!m_floatingTerm.isValid())
-            fail(URLFilterParser::MisplacedQuantifier);
+        ASSERT(m_floatingTerm.isValid());
 
         if (!minimum && maximum == 1)
             m_floatingTerm.quantify(AtomQuantifier::ZeroOrOne);
@@ -723,8 +717,6 @@ URLFilterParser::ParseStatus URLFilterParser::addPattern(const String& pattern,
 {
     if (!pattern.containsOnlyASCII())
         return NonASCII;
-    ASSERT(!pattern.isEmpty());
-
     if (pattern.isEmpty())
         return EmptyPattern;
 
@@ -754,16 +746,10 @@ String URLFilterParser::statusString(ParseStatus status)
         return "Ok";
     case MatchesEverything:
         return "Matches everything.";
-    case UnclosedGroups:
-        return "The expression has unclosed groups.";
-    case CannotMatchAnything:
-        return "The pattern cannot match anything.";
     case NonASCII:
         return "Only ASCII characters are supported in pattern.";
     case UnsupportedCharacterClass:
         return "Character class is not supported.";
-    case MisplacedQuantifier:
-        return "Quantifier without corresponding term to quantify.";
     case BackReference:
         return "Patterns cannot contain backreferences.";
     case MisplacedStartOfLine:
index 81501ea..08239ad 100644 (file)
@@ -44,11 +44,8 @@ public:
     enum ParseStatus {
         Ok,
         MatchesEverything,
-        UnclosedGroups,
-        CannotMatchAnything,
         NonASCII,
         UnsupportedCharacterClass,
-        MisplacedQuantifier,
         BackReference,
         MisplacedStartOfLine,
         WordBoundary,
index 0a47746..17698ca 100644 (file)
@@ -1,5 +1,17 @@
 2015-03-19  Alex Christensen  <achristensen@webkit.org>
 
+        [Content Extensions] Test regular expression parse failures.
+        https://bugs.webkit.org/show_bug.cgi?id=142872
+
+        Reviewed by Benjamin Poulain.
+
+        * TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp:
+        (TestWebKitAPI::testPatternStatus):
+        (TestWebKitAPI::TEST_F):
+        Added tests for each parsing status.
+
+2015-03-19  Alex Christensen  <achristensen@webkit.org>
+
         Fix API tests after r181754.
 
         * TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp:
index 10e114a..e9f6499 100644 (file)
@@ -341,11 +341,11 @@ TEST_F(ContentExtensionTest, ResourceType)
     testRequest(backend, mainDocumentRequest("http://block_only_images.org", ResourceType::Document), { });
 }
 
-static void testPatternStatus(const char* pattern, ContentExtensions::URLFilterParser::ParseStatus status)
+static void testPatternStatus(String pattern, ContentExtensions::URLFilterParser::ParseStatus status)
 {
     ContentExtensions::NFA nfa;
     ContentExtensions::URLFilterParser parser(nfa);
-    EXPECT_EQ(status, parser.addPattern(ASCIILiteral(pattern), false, 0));
+    EXPECT_EQ(status, parser.addPattern(pattern, false, 0));
 }
     
 TEST_F(ContentExtensionTest, ParsingFailures)
@@ -353,7 +353,55 @@ TEST_F(ContentExtensionTest, ParsingFailures)
     testPatternStatus("a*b?.*.?[a-z]?[a-z]*", ContentExtensions::URLFilterParser::ParseStatus::MatchesEverything);
     testPatternStatus("a*b?.*.?[a-z]?[a-z]+", ContentExtensions::URLFilterParser::ParseStatus::Ok);
     testPatternStatus("a*b?.*.?[a-z]?[a-z]", ContentExtensions::URLFilterParser::ParseStatus::Ok);
-    // FIXME: Add regexes that cause each parse status.
+    testPatternStatus(".*?a", ContentExtensions::URLFilterParser::ParseStatus::Ok);
+    testPatternStatus(".*a", ContentExtensions::URLFilterParser::ParseStatus::Ok);
+    
+    testPatternStatus("(?!)", ContentExtensions::URLFilterParser::ParseStatus::Group);
+    testPatternStatus("(?=)", ContentExtensions::URLFilterParser::ParseStatus::Group);
+    testPatternStatus("(?!a)", ContentExtensions::URLFilterParser::ParseStatus::Group);
+    testPatternStatus("(?=a)", ContentExtensions::URLFilterParser::ParseStatus::Group);
+    testPatternStatus("(regex)", ContentExtensions::URLFilterParser::ParseStatus::Ok);
+    testPatternStatus("(regex", ContentExtensions::URLFilterParser::ParseStatus::YarrError);
+    testPatternStatus("((regex)", ContentExtensions::URLFilterParser::ParseStatus::YarrError);
+    testPatternStatus("(?:regex)", ContentExtensions::URLFilterParser::ParseStatus::Ok);
+    testPatternStatus("(?:regex", ContentExtensions::URLFilterParser::ParseStatus::YarrError);
+    testPatternStatus("[^.]+", ContentExtensions::URLFilterParser::ParseStatus::Ok);
+    
+    testPatternStatus("a++", ContentExtensions::URLFilterParser::ParseStatus::YarrError);
+    testPatternStatus("[a]++", ContentExtensions::URLFilterParser::ParseStatus::YarrError);
+    testPatternStatus("+", ContentExtensions::URLFilterParser::ParseStatus::YarrError);
+    
+    testPatternStatus("[", ContentExtensions::URLFilterParser::ParseStatus::YarrError);
+    testPatternStatus("[a}", ContentExtensions::URLFilterParser::ParseStatus::YarrError);
+    
+    // FIXME: Look into why these do not cause YARR parsing errors.  They probably should.
+    testPatternStatus("a]", ContentExtensions::URLFilterParser::ParseStatus::Ok);
+    testPatternStatus("{", ContentExtensions::URLFilterParser::ParseStatus::Ok);
+    testPatternStatus("{[a]", ContentExtensions::URLFilterParser::ParseStatus::Ok);
+    testPatternStatus("{0", ContentExtensions::URLFilterParser::ParseStatus::Ok);
+    testPatternStatus("{0,", ContentExtensions::URLFilterParser::ParseStatus::Ok);
+    testPatternStatus("{0,1", ContentExtensions::URLFilterParser::ParseStatus::Ok);
+    testPatternStatus("a{0,1", ContentExtensions::URLFilterParser::ParseStatus::Ok);
+    testPatternStatus("a{a,b}", ContentExtensions::URLFilterParser::ParseStatus::Ok);
+
+    const char nonASCII[2] = {-1, '\0'};
+    testPatternStatus(nonASCII, ContentExtensions::URLFilterParser::ParseStatus::NonASCII);
+    testPatternStatus("\\xff", ContentExtensions::URLFilterParser::ParseStatus::NonASCII);
+    
+    testPatternStatus("\\x\\r\\n", ContentExtensions::URLFilterParser::ParseStatus::Ok);
+    testPatternStatus("\\b", ContentExtensions::URLFilterParser::ParseStatus::WordBoundary);
+    testPatternStatus("[\\d]", ContentExtensions::URLFilterParser::ParseStatus::AtomCharacter);
+    testPatternStatus("\\d\\D\\w\\s\\v\\h\\i\\c", ContentExtensions::URLFilterParser::ParseStatus::UnsupportedCharacterClass);
+    
+    testPatternStatus("this|that", ContentExtensions::URLFilterParser::ParseStatus::Disjunction);
+    testPatternStatus("a{0,1}b", ContentExtensions::URLFilterParser::ParseStatus::Ok);
+    testPatternStatus("a{0,2}b", ContentExtensions::URLFilterParser::ParseStatus::InvalidQuantifier);
+    testPatternStatus("", ContentExtensions::URLFilterParser::ParseStatus::EmptyPattern);
+    testPatternStatus("$$", ContentExtensions::URLFilterParser::ParseStatus::MisplacedEndOfLine);
+    testPatternStatus("a^", ContentExtensions::URLFilterParser::ParseStatus::MisplacedStartOfLine);
+    testPatternStatus("(^)", ContentExtensions::URLFilterParser::ParseStatus::MisplacedStartOfLine);
+    
+    testPatternStatus("(a)\\1", ContentExtensions::URLFilterParser::ParseStatus::Ok); // This should be BackReference, right?
 }
 
 } // namespace TestWebKitAPI