Remove unnecessary includes of wtf/Platform.h. This is already pulled in by config.h.
[WebKit-https.git] / JavaScriptCore / yarr / RegexPattern.h
1 /*
2  * Copyright (C) 2009 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
24  */
25
26 #ifndef RegexPattern_h
27 #define RegexPattern_h
28
29
30 #if ENABLE(YARR)
31
32 #include <wtf/Vector.h>
33 #include <wtf/unicode/Unicode.h>
34
35
36 namespace JSC { namespace Yarr {
37
38 #define RegexStackSpaceForBackTrackInfoPatternCharacter 1 // Only for !fixed quantifiers.
39 #define RegexStackSpaceForBackTrackInfoCharacterClass 1 // Only for !fixed quantifiers.
40 #define RegexStackSpaceForBackTrackInfoBackReference 2
41 #define RegexStackSpaceForBackTrackInfoAlternative 1 // One per alternative.
42 #define RegexStackSpaceForBackTrackInfoParentheticalAssertion 1
43 #define RegexStackSpaceForBackTrackInfoParenthesesOnce 1 // Only for !fixed quantifiers.
44 #define RegexStackSpaceForBackTrackInfoParentheses 4
45
46 struct PatternDisjunction;
47
48 struct CharacterRange {
49     UChar begin;
50     UChar end;
51
52     CharacterRange(UChar begin, UChar end)
53         : begin(begin)
54         , end(end)
55     {
56     }
57 };
58
59 struct CharacterClass : FastAllocBase {
60     Vector<UChar> m_matches;
61     Vector<CharacterRange> m_ranges;
62     Vector<UChar> m_matchesUnicode;
63     Vector<CharacterRange> m_rangesUnicode;
64 };
65
66 enum QuantifierType {
67     QuantifierFixedCount,
68     QuantifierGreedy,
69     QuantifierNonGreedy,
70 };
71
72 struct PatternTerm {
73     enum Type {
74         TypeAssertionBOL,
75         TypeAssertionEOL,
76         TypeAssertionWordBoundary,
77         TypePatternCharacter,
78         TypeCharacterClass,
79         TypeBackReference,
80         TypeForwardReference,
81         TypeParenthesesSubpattern,
82         TypeParentheticalAssertion,
83     } type;
84     bool invertOrCapture;
85     union {
86         UChar patternCharacter;
87         CharacterClass* characterClass;
88         unsigned subpatternId;
89         struct {
90             PatternDisjunction* disjunction;
91             unsigned subpatternId;
92             unsigned lastSubpatternId;
93             bool isCopy;
94         } parentheses;
95     };
96     QuantifierType quantityType;
97     unsigned quantityCount;
98     int inputPosition;
99     unsigned frameLocation;
100
101     PatternTerm(UChar ch)
102         : type(PatternTerm::TypePatternCharacter)
103     {
104         patternCharacter = ch;
105         quantityType = QuantifierFixedCount;
106         quantityCount = 1;
107     }
108
109     PatternTerm(CharacterClass* charClass, bool invert)
110         : type(PatternTerm::TypeCharacterClass)
111         , invertOrCapture(invert)
112     {
113         characterClass = charClass;
114         quantityType = QuantifierFixedCount;
115         quantityCount = 1;
116     }
117
118     PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool invertOrCapture)
119         : type(type)
120         , invertOrCapture(invertOrCapture)
121     {
122         parentheses.disjunction = disjunction;
123         parentheses.subpatternId = subpatternId;
124         parentheses.isCopy = false;
125         quantityType = QuantifierFixedCount;
126         quantityCount = 1;
127     }
128     
129     PatternTerm(Type type, bool invert = false)
130         : type(type)
131         , invertOrCapture(invert)
132     {
133         quantityType = QuantifierFixedCount;
134         quantityCount = 1;
135     }
136
137     PatternTerm(unsigned spatternId)
138         : type(TypeBackReference)
139         , invertOrCapture(false)
140     {
141         subpatternId = spatternId;
142         quantityType = QuantifierFixedCount;
143         quantityCount = 1;
144     }
145
146     static PatternTerm ForwardReference()
147     {
148         return PatternTerm(TypeForwardReference);
149     }
150
151     static PatternTerm BOL()
152     {
153         return PatternTerm(TypeAssertionBOL);
154     }
155
156     static PatternTerm EOL()
157     {
158         return PatternTerm(TypeAssertionEOL);
159     }
160
161     static PatternTerm WordBoundary(bool invert)
162     {
163         return PatternTerm(TypeAssertionWordBoundary, invert);
164     }
165     
166     bool invert()
167     {
168         return invertOrCapture;
169     }
170
171     bool capture()
172     {
173         return invertOrCapture;
174     }
175     
176     void quantify(unsigned count, QuantifierType type)
177     {
178         quantityCount = count;
179         quantityType = type;
180     }
181 };
182
183 struct PatternAlternative : FastAllocBase {
184     PatternAlternative(PatternDisjunction* disjunction)
185         : m_parent(disjunction)
186     {
187     }
188
189     PatternTerm& lastTerm()
190     {
191         ASSERT(m_terms.size());
192         return m_terms[m_terms.size() - 1];
193     }
194     
195     void removeLastTerm()
196     {
197         ASSERT(m_terms.size());
198         m_terms.shrink(m_terms.size() - 1);
199     }
200
201     Vector<PatternTerm> m_terms;
202     PatternDisjunction* m_parent;
203     unsigned m_minimumSize;
204     bool m_hasFixedSize;
205 };
206
207 struct PatternDisjunction : FastAllocBase {
208     PatternDisjunction(PatternAlternative* parent = 0)
209         : m_parent(parent)
210     {
211     }
212     
213     ~PatternDisjunction()
214     {
215         deleteAllValues(m_alternatives);
216     }
217
218     PatternAlternative* addNewAlternative()
219     {
220         PatternAlternative* alternative = new PatternAlternative(this);
221         m_alternatives.append(alternative);
222         return alternative;
223     }
224
225     Vector<PatternAlternative*> m_alternatives;
226     PatternAlternative* m_parent;
227     unsigned m_minimumSize;
228     unsigned m_callFrameSize;
229     bool m_hasFixedSize;
230 };
231
232 // You probably don't want to be calling these functions directly
233 // (please to be calling newlineCharacterClass() et al on your
234 // friendly neighborhood RegexPattern instance to get nicely
235 // cached copies).
236 CharacterClass* newlineCreate();
237 CharacterClass* digitsCreate();
238 CharacterClass* spacesCreate();
239 CharacterClass* wordcharCreate();
240 CharacterClass* nondigitsCreate();
241 CharacterClass* nonspacesCreate();
242 CharacterClass* nonwordcharCreate();
243
244 struct RegexPattern {
245     RegexPattern(bool ignoreCase, bool multiline)
246         : m_ignoreCase(ignoreCase)
247         , m_multiline(multiline)
248         , m_numSubpatterns(0)
249         , m_maxBackReference(0)
250         , newlineCached(0)
251         , digitsCached(0)
252         , spacesCached(0)
253         , wordcharCached(0)
254         , nondigitsCached(0)
255         , nonspacesCached(0)
256         , nonwordcharCached(0)
257     {
258     }
259
260     ~RegexPattern()
261     {
262         deleteAllValues(m_disjunctions);
263         deleteAllValues(m_userCharacterClasses);
264     }
265
266     void reset()
267     {
268         m_numSubpatterns = 0;
269         m_maxBackReference = 0;
270
271         newlineCached = 0;
272         digitsCached = 0;
273         spacesCached = 0;
274         wordcharCached = 0;
275         nondigitsCached = 0;
276         nonspacesCached = 0;
277         nonwordcharCached = 0;
278
279         deleteAllValues(m_disjunctions);
280         m_disjunctions.clear();
281         deleteAllValues(m_userCharacterClasses);
282         m_userCharacterClasses.clear();
283     }
284
285     bool containsIllegalBackReference()
286     {
287         return m_maxBackReference > m_numSubpatterns;
288     }
289
290     CharacterClass* newlineCharacterClass()
291     {
292         if (!newlineCached)
293             m_userCharacterClasses.append(newlineCached = newlineCreate());
294         return newlineCached;
295     }
296     CharacterClass* digitsCharacterClass()
297     {
298         if (!digitsCached)
299             m_userCharacterClasses.append(digitsCached = digitsCreate());
300         return digitsCached;
301     }
302     CharacterClass* spacesCharacterClass()
303     {
304         if (!spacesCached)
305             m_userCharacterClasses.append(spacesCached = spacesCreate());
306         return spacesCached;
307     }
308     CharacterClass* wordcharCharacterClass()
309     {
310         if (!wordcharCached)
311             m_userCharacterClasses.append(wordcharCached = wordcharCreate());
312         return wordcharCached;
313     }
314     CharacterClass* nondigitsCharacterClass()
315     {
316         if (!nondigitsCached)
317             m_userCharacterClasses.append(nondigitsCached = nondigitsCreate());
318         return nondigitsCached;
319     }
320     CharacterClass* nonspacesCharacterClass()
321     {
322         if (!nonspacesCached)
323             m_userCharacterClasses.append(nonspacesCached = nonspacesCreate());
324         return nonspacesCached;
325     }
326     CharacterClass* nonwordcharCharacterClass()
327     {
328         if (!nonwordcharCached)
329             m_userCharacterClasses.append(nonwordcharCached = nonwordcharCreate());
330         return nonwordcharCached;
331     }
332
333     bool m_ignoreCase;
334     bool m_multiline;
335     unsigned m_numSubpatterns;
336     unsigned m_maxBackReference;
337     PatternDisjunction* m_body;
338     Vector<PatternDisjunction*, 4> m_disjunctions;
339     Vector<CharacterClass*> m_userCharacterClasses;
340
341 private:
342     CharacterClass* newlineCached;
343     CharacterClass* digitsCached;
344     CharacterClass* spacesCached;
345     CharacterClass* wordcharCached;
346     CharacterClass* nondigitsCached;
347     CharacterClass* nonspacesCached;
348     CharacterClass* nonwordcharCached;
349 };
350
351 } } // namespace JSC::Yarr
352
353 #endif
354
355 #endif // RegexPattern_h