2 * Copyright (C) 2004, 2008, 2009 Apple Inc. All rights reserved.
3 * Copyright (C) 2008 Collabora Ltd.
4 * Copyright (C) 2011 Peter Varga (pvarga@webkit.org), University of Szeged
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include "RegularExpression.h"
31 #include <wtf/BumpPointerAllocator.h>
33 #include "yarr/Yarr.h"
37 class RegularExpression::Private : public RefCounted<RegularExpression::Private> {
39 static PassRefPtr<Private> create(const String& pattern, TextCaseSensitivity caseSensitivity)
41 return adoptRef(new Private(pattern, caseSensitivity));
46 unsigned m_numSubpatterns;
47 OwnPtr<JSC::Yarr::BytecodePattern> m_regExpByteCode;
50 Private(const String& pattern, TextCaseSensitivity caseSensitivity)
52 , m_regExpByteCode(compile(pattern, caseSensitivity))
53 , m_constructionError(0)
57 PassOwnPtr<JSC::Yarr::BytecodePattern> compile(const String& patternString, TextCaseSensitivity caseSensitivity)
59 JSC::Yarr::YarrPattern pattern(JSC::UString(patternString.impl()), (caseSensitivity == TextCaseInsensitive), false, &m_constructionError);
60 if (m_constructionError) {
61 LOG_ERROR("RegularExpression: YARR compile failed with '%s'", m_constructionError);
62 return PassOwnPtr<JSC::Yarr::BytecodePattern>();
65 m_numSubpatterns = pattern.m_numSubpatterns;
67 return JSC::Yarr::byteCompile(pattern, &m_regexAllocator);
70 BumpPointerAllocator m_regexAllocator;
71 const char* m_constructionError;
74 RegularExpression::RegularExpression(const String& pattern, TextCaseSensitivity caseSensitivity)
75 : d(Private::create(pattern, caseSensitivity))
79 RegularExpression::RegularExpression(const RegularExpression& re)
84 RegularExpression::~RegularExpression()
88 RegularExpression& RegularExpression::operator=(const RegularExpression& re)
94 int RegularExpression::match(const String& str, int startFrom, int* matchLength) const
96 if (!d->m_regExpByteCode)
102 const size_t maxOffsets = (d->m_numSubpatterns + 1) * 2;
103 int offsets[maxOffsets];
105 for (unsigned j = 0, i = 0; i < d->m_numSubpatterns + 1; j += 2, i++)
108 int result = JSC::Yarr::interpret(d->m_regExpByteCode.get(), str.characters(), startFrom, str.length(), offsets);
109 ASSERT(result >= -1);
112 d->lastMatchLength = -1;
116 // 1 means 1 match; 0 means more than one match. First match is recorded in offsets.
117 d->lastMatchLength = offsets[1] - offsets[0];
119 *matchLength = d->lastMatchLength;
123 int RegularExpression::searchRev(const String& str) const
125 // FIXME: This could be faster if it actually searched backwards.
126 // Instead, it just searches forwards, multiple times until it finds the last match.
131 int lastMatchLength = -1;
134 pos = match(str, start, &matchLength);
136 if (pos + matchLength > lastPos + lastMatchLength) {
137 // replace last match if this one is later and not a subset of the last match
139 lastMatchLength = matchLength;
144 d->lastMatchLength = lastMatchLength;
148 int RegularExpression::matchedLength() const
150 return d->lastMatchLength;
153 void replace(String& string, const RegularExpression& target, const String& replacement)
156 while (index < static_cast<int>(string.length())) {
158 index = target.match(string, index, &matchLength);
161 string.replace(index, matchLength, replacement);
162 index += replacement.length();
164 break; // Avoid infinite loop on 0-length matches, e.g. [a-z]*
168 } // namespace WebCore