2 * Copyright (C) 1999-2001, 2004 Harri Porten (porten@kde.org)
3 * Copyright (c) 2007, 2008, 2016 Apple Inc. All rights reserved.
4 * Copyright (C) 2009 Torch Mobile, Inc.
5 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 #ifndef RegExpInlines_h
24 #define RegExpInlines_h
27 #include "JSCInlines.h"
29 #include "YarrInterpreter.h"
32 #define REGEXP_FUNC_TEST_DATA_GEN 0
34 #if REGEXP_FUNC_TEST_DATA_GEN
42 #if REGEXP_FUNC_TEST_DATA_GEN
43 class RegExpFunctionalTestCollector {
44 // This class is not thread safe.
46 static const char* const s_fileName;
49 static RegExpFunctionalTestCollector* get();
51 ~RegExpFunctionalTestCollector();
53 void outputOneTest(RegExp*, String, int, int*, int);
54 void clearRegExp(RegExp* regExp)
56 if (regExp == m_lastRegExp)
61 RegExpFunctionalTestCollector();
63 void outputEscapedString(const String&, bool escapeSlash = false);
65 static RegExpFunctionalTestCollector* s_instance;
69 #endif // REGEXP_FUNC_TEST_DATA_GEN
71 ALWAYS_INLINE bool RegExp::hasCodeFor(Yarr::YarrCharSize charSize)
75 if (m_state != JITCode)
77 if ((charSize == Yarr::Char8) && (m_regExpJITCode.has8BitCode()))
79 if ((charSize == Yarr::Char16) && (m_regExpJITCode.has16BitCode()))
88 ALWAYS_INLINE void RegExp::compileIfNecessary(VM& vm, Yarr::YarrCharSize charSize)
90 if (hasCodeFor(charSize))
93 compile(&vm, charSize);
96 ALWAYS_INLINE int RegExp::matchInline(VM& vm, const String& s, unsigned startOffset, Vector<int, 32>& ovector)
98 #if ENABLE(REGEXP_TRACING)
100 m_rtMatchTotalSubjectStringLen += (double)(s.length() - startOffset);
103 ASSERT(m_state != ParseError);
104 compileIfNecessary(vm, s.is8Bit() ? Yarr::Char8 : Yarr::Char16);
106 int offsetVectorSize = (m_numSubpatterns + 1) * 2;
107 ovector.resize(offsetVectorSize);
108 int* offsetVector = ovector.data();
112 if (m_state == JITCode) {
114 result = m_regExpJITCode.execute(s.characters8(), startOffset, s.length(), offsetVector).start;
116 result = m_regExpJITCode.execute(s.characters16(), startOffset, s.length(), offsetVector).start;
117 #if ENABLE(YARR_JIT_DEBUG)
118 matchCompareWithInterpreter(s, startOffset, offsetVector, result);
122 result = Yarr::interpret(m_regExpBytecode.get(), s, startOffset, reinterpret_cast<unsigned*>(offsetVector));
124 // FIXME: The YARR engine should handle unsigned or size_t length matches.
125 // The YARR Interpreter is "unsigned" clean, while the YARR JIT hasn't been addressed.
126 // The offset vector handling needs to change as well.
127 // Right now we convert a match where the offsets overflowed into match failure.
128 // There are two places in WebCore that call the interpreter directly that need to
129 // have their offsets changed to int as well. They are yarr/RegularExpression.cpp
130 // and inspector/ContentSearchUtilities.cpp
131 if (s.length() > INT_MAX) {
132 bool overflowed = false;
137 for (unsigned i = 0; i <= m_numSubpatterns; i++) {
138 if ((offsetVector[i*2] < -1) || ((offsetVector[i*2] >= 0) && (offsetVector[i*2+1] < -1))) {
140 offsetVector[i*2] = -1;
141 offsetVector[i*2+1] = -1;
149 ASSERT(result >= -1);
151 #if REGEXP_FUNC_TEST_DATA_GEN
152 RegExpFunctionalTestCollector::get()->outputOneTest(this, s, startOffset, offsetVector, result);
155 #if ENABLE(REGEXP_TRACING)
157 m_rtMatchFoundCount++;
163 ALWAYS_INLINE bool RegExp::hasMatchOnlyCodeFor(Yarr::YarrCharSize charSize)
167 if (m_state != JITCode)
169 if ((charSize == Yarr::Char8) && (m_regExpJITCode.has8BitCodeMatchOnly()))
171 if ((charSize == Yarr::Char16) && (m_regExpJITCode.has16BitCodeMatchOnly()))
181 ALWAYS_INLINE void RegExp::compileIfNecessaryMatchOnly(VM& vm, Yarr::YarrCharSize charSize)
183 if (hasMatchOnlyCodeFor(charSize))
186 compileMatchOnly(&vm, charSize);
189 ALWAYS_INLINE MatchResult RegExp::matchInline(VM& vm, const String& s, unsigned startOffset)
191 #if ENABLE(REGEXP_TRACING)
192 m_rtMatchOnlyCallCount++;
193 m_rtMatchOnlyTotalSubjectStringLen += (double)(s.length() - startOffset);
196 ASSERT(m_state != ParseError);
197 compileIfNecessaryMatchOnly(vm, s.is8Bit() ? Yarr::Char8 : Yarr::Char16);
200 if (m_state == JITCode) {
201 MatchResult result = s.is8Bit() ?
202 m_regExpJITCode.execute(s.characters8(), startOffset, s.length()) :
203 m_regExpJITCode.execute(s.characters16(), startOffset, s.length());
204 #if ENABLE(REGEXP_TRACING)
206 m_rtMatchOnlyFoundCount++;
212 int offsetVectorSize = (m_numSubpatterns + 1) * 2;
214 Vector<int, 32> nonReturnedOvector;
215 nonReturnedOvector.resize(offsetVectorSize);
216 offsetVector = nonReturnedOvector.data();
217 int r = Yarr::interpret(m_regExpBytecode.get(), s, startOffset, reinterpret_cast<unsigned*>(offsetVector));
218 #if REGEXP_FUNC_TEST_DATA_GEN
219 RegExpFunctionalTestCollector::get()->outputOneTest(this, s, startOffset, offsetVector, result);
223 #if ENABLE(REGEXP_TRACING)
224 m_rtMatchOnlyFoundCount++;
226 return MatchResult(r, reinterpret_cast<unsigned*>(offsetVector)[1]);
229 return MatchResult::failed();
234 #endif // RegExpInlines_h