2 * Copyright (C) 2014, 2015 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23 * THE POSSIBILITY OF SUCH DAMAGE.
27 #include "URLFilterParser.h"
29 #if ENABLE(CONTENT_EXTENSIONS)
32 #include <JavaScriptCore/YarrParser.h>
36 namespace ContentExtensions {
40 struct BoundedSubGraph {
45 GraphBuilder(NFA& nfa, uint64_t patternId)
47 , m_patternId(patternId)
48 , m_activeGroup({ nfa.root(), nfa.root() })
49 , m_lastAtom(m_activeGroup)
57 if (m_activeGroup.start != m_activeGroup.end)
58 m_nfa.setFinal(m_activeGroup.end);
60 fail(ASCIILiteral("The pattern cannot match anything."));
63 const String& errorMessage() const
65 return m_errorMessage;
68 void atomPatternCharacter(UChar character)
70 if (isASCII(character)) {
71 fail(ASCIILiteral("Only ASCII characters are supported in pattern."));
78 m_hasValidAtom = true;
79 unsigned newEnd = m_nfa.createNode(m_patternId);
80 m_nfa.addTransition(m_lastAtom.end, newEnd, static_cast<char>(character));
81 m_lastAtom.start = m_lastAtom.end;
82 m_lastAtom.end = newEnd;
83 m_activeGroup.end = m_lastAtom.end;
86 void atomBuiltInCharacterClass(JSC::Yarr::BuiltInCharacterClassID builtInCharacterClassID, bool inverted)
91 if (builtInCharacterClassID == JSC::Yarr::NewlineClassID && inverted) {
92 // FIXME: handle new line properly.
93 m_hasValidAtom = true;
94 unsigned newEnd = m_nfa.createNode(m_patternId);
95 for (unsigned i = 1; i < 128; ++i)
96 m_nfa.addTransition(m_lastAtom.end, newEnd, i);
97 m_lastAtom.start = m_lastAtom.end;
98 m_lastAtom.end = newEnd;
99 m_activeGroup.end = m_lastAtom.end;
101 fail(ASCIILiteral("Character class is not supported."));
104 void quantifyAtom(unsigned minimum, unsigned maximum, bool)
109 ASSERT(m_hasValidAtom);
110 if (!m_hasValidAtom) {
111 fail(ASCIILiteral("Quantifier without corresponding atom to quantify."));
115 if (!minimum && maximum == 1)
116 m_nfa.addEpsilonTransition(m_lastAtom.start, m_lastAtom.end);
117 else if (!minimum && maximum == JSC::Yarr::quantifyInfinite) {
118 m_nfa.addEpsilonTransition(m_lastAtom.start, m_lastAtom.end);
119 m_nfa.addEpsilonTransition(m_lastAtom.end, m_lastAtom.start);
120 } else if (minimum == 1 && maximum == JSC::Yarr::quantifyInfinite)
121 m_nfa.addEpsilonTransition(m_lastAtom.end, m_lastAtom.start);
123 fail(ASCIILiteral("Arbitrary atom repetitions are not supported."));
126 NO_RETURN_DUE_TO_ASSERT void atomBackReference(unsigned)
128 fail(ASCIILiteral("Patterns cannot contain backreferences."));
129 ASSERT_NOT_REACHED();
132 void atomCharacterClassAtom(UChar)
134 fail(ASCIILiteral("Character class atoms are not supported yet."));
139 fail(ASCIILiteral("Line boundary assertions are not supported yet."));
144 fail(ASCIILiteral("Line boundary assertions are not supported yet."));
147 void assertionWordBoundary(bool)
149 fail(ASCIILiteral("Word boundaries assertions are not supported yet."));
152 void atomCharacterClassBegin(bool = false)
154 fail(ASCIILiteral("Character class atoms are not supported yet."));
157 void atomCharacterClassRange(UChar, UChar)
159 fail(ASCIILiteral("Character class ranges are not supported yet."));
162 void atomCharacterClassBuiltIn(JSC::Yarr::BuiltInCharacterClassID, bool)
164 fail(ASCIILiteral("Buildins character class atoms are not supported yet."));
167 void atomCharacterClassEnd()
169 fail(ASCIILiteral("Character class are not supported yet."));
172 void atomParenthesesSubpatternBegin(bool = true)
174 fail(ASCIILiteral("Groups are not supported yet."));
177 void atomParentheticalAssertionBegin(bool = false)
179 fail(ASCIILiteral("Groups are not supported yet."));
182 void atomParenthesesEnd()
184 fail(ASCIILiteral("Groups are not supported yet."));
189 fail(ASCIILiteral("Disjunctions are not supported yet."));
194 bool hasError() const
196 return !m_errorMessage.isNull();
199 void fail(const String& errorMessage)
203 m_errorMessage = errorMessage;
207 const uint64_t m_patternId;
209 BoundedSubGraph m_activeGroup;
211 bool m_hasValidAtom = false;
212 BoundedSubGraph m_lastAtom;
214 String m_errorMessage;
217 void URLFilterParser::parse(const String& pattern, uint64_t patternId, NFA& nfa)
219 if (!pattern.containsOnlyASCII())
220 m_errorMessage = ASCIILiteral("URLFilterParser only supports ASCII patterns.");
221 ASSERT(!pattern.isEmpty());
223 if (pattern.isEmpty())
226 unsigned oldSize = nfa.graphSize();
228 GraphBuilder graphBuilder(nfa, patternId);
229 const char* error = JSC::Yarr::parse(graphBuilder, pattern, 0);
231 m_errorMessage = String(error);
233 graphBuilder.finalize();
236 m_errorMessage = graphBuilder.errorMessage();
239 nfa.restoreToGraphSize(oldSize);
242 } // namespace ContentExtensions
243 } // namespace WebCore
245 #endif // ENABLE(CONTENT_EXTENSIONS)