547022f34104fe655140d50b9a508d136c308838
[WebKit-https.git] / Source / JavaScriptCore / yarr / YarrInterpreter.cpp
1 /*
2  * Copyright (C) 2009 Apple Inc. All rights reserved.
3  * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26
27 #include "config.h"
28 #include "YarrInterpreter.h"
29
30 #include "Yarr.h"
31 #include "YarrCanonicalizeUCS2.h"
32 #include <wtf/BumpPointerAllocator.h>
33 #include <wtf/DataLog.h>
34 #include <wtf/text/CString.h>
35 #include <wtf/text/WTFString.h>
36
37 #ifndef NDEBUG
38 #include <stdio.h>
39 #endif
40
41 using namespace WTF;
42
43 namespace JSC { namespace Yarr {
44
45 template<typename CharType>
46 class Interpreter {
47 public:
48     struct ParenthesesDisjunctionContext;
49
50     struct BackTrackInfoPatternCharacter {
51         uintptr_t matchAmount;
52     };
53     struct BackTrackInfoCharacterClass {
54         uintptr_t matchAmount;
55     };
56     struct BackTrackInfoBackReference {
57         uintptr_t begin; // Not really needed for greedy quantifiers.
58         uintptr_t matchAmount; // Not really needed for fixed quantifiers.
59     };
60     struct BackTrackInfoAlternative {
61         uintptr_t offset;
62     };
63     struct BackTrackInfoParentheticalAssertion {
64         uintptr_t begin;
65     };
66     struct BackTrackInfoParenthesesOnce {
67         uintptr_t begin;
68     };
69     struct BackTrackInfoParenthesesTerminal {
70         uintptr_t begin;
71     };
72     struct BackTrackInfoParentheses {
73         uintptr_t matchAmount;
74         ParenthesesDisjunctionContext* lastContext;
75     };
76
77     static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context)
78     {
79         context->next = backTrack->lastContext;
80         backTrack->lastContext = context;
81         ++backTrack->matchAmount;
82     }
83
84     static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack)
85     {
86         ASSERT(backTrack->matchAmount);
87         ASSERT(backTrack->lastContext);
88         backTrack->lastContext = backTrack->lastContext->next;
89         --backTrack->matchAmount;
90     }
91
92     struct DisjunctionContext
93     {
94         DisjunctionContext()
95             : term(0)
96         {
97         }
98
99         void* operator new(size_t, void* where)
100         {
101             return where;
102         }
103
104         int term;
105         unsigned matchBegin;
106         unsigned matchEnd;
107         uintptr_t frame[1];
108     };
109
110     DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction)
111     {
112         size_t size = sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
113         allocatorPool = allocatorPool->ensureCapacity(size);
114         RELEASE_ASSERT(allocatorPool);
115         return new (allocatorPool->alloc(size)) DisjunctionContext();
116     }
117
118     void freeDisjunctionContext(DisjunctionContext* context)
119     {
120         allocatorPool = allocatorPool->dealloc(context);
121     }
122
123     struct ParenthesesDisjunctionContext
124     {
125         ParenthesesDisjunctionContext(unsigned* output, ByteTerm& term)
126             : next(0)
127         {
128             unsigned firstSubpatternId = term.atom.subpatternId;
129             unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns;
130
131             for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) {
132                 subpatternBackup[i] = output[(firstSubpatternId << 1) + i];
133                 output[(firstSubpatternId << 1) + i] = offsetNoMatch;
134             }
135
136             new (getDisjunctionContext(term)) DisjunctionContext();
137         }
138
139         void* operator new(size_t, void* where)
140         {
141             return where;
142         }
143
144         void restoreOutput(unsigned* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns)
145         {
146             for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i)
147                 output[(firstSubpatternId << 1) + i] = subpatternBackup[i];
148         }
149
150         DisjunctionContext* getDisjunctionContext(ByteTerm& term)
151         {
152             return reinterpret_cast<DisjunctionContext*>(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1]));
153         }
154
155         ParenthesesDisjunctionContext* next;
156         unsigned subpatternBackup[1];
157     };
158
159     ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, unsigned* output, ByteTerm& term)
160     {
161         size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(unsigned) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
162         allocatorPool = allocatorPool->ensureCapacity(size);
163         RELEASE_ASSERT(allocatorPool);
164         return new (allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term);
165     }
166
167     void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context)
168     {
169         allocatorPool = allocatorPool->dealloc(context);
170     }
171
172     class InputStream {
173     public:
174         InputStream(const CharType* input, unsigned start, unsigned length)
175             : input(input)
176             , pos(start)
177             , length(length)
178         {
179         }
180
181         void next()
182         {
183             ++pos;
184         }
185
186         void rewind(unsigned amount)
187         {
188             ASSERT(pos >= amount);
189             pos -= amount;
190         }
191
192         int read()
193         {
194             ASSERT(pos < length);
195             if (pos < length)
196                 return input[pos];
197             return -1;
198         }
199
200         int readPair()
201         {
202             ASSERT(pos + 1 < length);
203             return input[pos] | input[pos + 1] << 16;
204         }
205
206         int readChecked(unsigned negativePositionOffest)
207         {
208             RELEASE_ASSERT(pos >= negativePositionOffest);
209             unsigned p = pos - negativePositionOffest;
210             ASSERT(p < length);
211             return input[p];
212         }
213
214         int reread(unsigned from)
215         {
216             ASSERT(from < length);
217             return input[from];
218         }
219
220         int prev()
221         {
222             ASSERT(!(pos > length));
223             if (pos && length)
224                 return input[pos - 1];
225             return -1;
226         }
227
228         unsigned getPos()
229         {
230             return pos;
231         }
232
233         void setPos(unsigned p)
234         {
235             pos = p;
236         }
237
238         bool atStart()
239         {
240             return pos == 0;
241         }
242
243         bool atEnd()
244         {
245             return pos == length;
246         }
247
248         unsigned end()
249         {
250             return length;
251         }
252
253         bool checkInput(unsigned count)
254         {
255             if (((pos + count) <= length) && ((pos + count) >= pos)) {
256                 pos += count;
257                 return true;
258             }
259             return false;
260         }
261
262         void uncheckInput(unsigned count)
263         {
264             RELEASE_ASSERT(pos >= count);
265             pos -= count;
266         }
267
268         bool atStart(unsigned negativePositionOffest)
269         {
270             return pos == negativePositionOffest;
271         }
272
273         bool atEnd(unsigned negativePositionOffest)
274         {
275             RELEASE_ASSERT(pos >= negativePositionOffest);
276             return (pos - negativePositionOffest) == length;
277         }
278
279         bool isAvailableInput(unsigned offset)
280         {
281             return (((pos + offset) <= length) && ((pos + offset) >= pos));
282         }
283
284     private:
285         const CharType* input;
286         unsigned pos;
287         unsigned length;
288     };
289
290     bool testCharacterClass(CharacterClass* characterClass, int ch)
291     {
292         if (ch & 0xFF80) {
293             for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i)
294                 if (ch == characterClass->m_matchesUnicode[i])
295                     return true;
296             for (unsigned i = 0; i < characterClass->m_rangesUnicode.size(); ++i)
297                 if ((ch >= characterClass->m_rangesUnicode[i].begin) && (ch <= characterClass->m_rangesUnicode[i].end))
298                     return true;
299         } else {
300             for (unsigned i = 0; i < characterClass->m_matches.size(); ++i)
301                 if (ch == characterClass->m_matches[i])
302                     return true;
303             for (unsigned i = 0; i < characterClass->m_ranges.size(); ++i)
304                 if ((ch >= characterClass->m_ranges[i].begin) && (ch <= characterClass->m_ranges[i].end))
305                     return true;
306         }
307
308         return false;
309     }
310
311     bool checkCharacter(int testChar, unsigned negativeInputOffset)
312     {
313         return testChar == input.readChecked(negativeInputOffset);
314     }
315
316     bool checkCasedCharacter(int loChar, int hiChar, unsigned negativeInputOffset)
317     {
318         int ch = input.readChecked(negativeInputOffset);
319         return (loChar == ch) || (hiChar == ch);
320     }
321
322     bool checkCharacterClass(CharacterClass* characterClass, bool invert, unsigned negativeInputOffset)
323     {
324         bool match = testCharacterClass(characterClass, input.readChecked(negativeInputOffset));
325         return invert ? !match : match;
326     }
327
328     bool tryConsumeBackReference(int matchBegin, int matchEnd, unsigned negativeInputOffset)
329     {
330         unsigned matchSize = (unsigned)(matchEnd - matchBegin);
331
332         if (!input.checkInput(matchSize))
333             return false;
334
335         if (pattern->m_ignoreCase) {
336             for (unsigned i = 0; i < matchSize; ++i) {
337                 int oldCh = input.reread(matchBegin + i);
338                 int ch = input.readChecked(negativeInputOffset + matchSize - i);
339
340                 if (oldCh == ch)
341                     continue;
342
343                 // The definition for canonicalize (see ES 5.1, 15.10.2.8) means that
344                 // unicode values are never allowed to match against ascii ones.
345                 if (isASCII(oldCh) || isASCII(ch)) {
346                     if (toASCIIUpper(oldCh) == toASCIIUpper(ch))
347                         continue;
348                 } else if (areCanonicallyEquivalent(oldCh, ch))
349                     continue;
350
351                 input.uncheckInput(matchSize);
352                 return false;
353             }
354         } else {
355             for (unsigned i = 0; i < matchSize; ++i) {
356                 if (!checkCharacter(input.reread(matchBegin + i), negativeInputOffset + matchSize - i)) {
357                     input.uncheckInput(matchSize);
358                     return false;
359                 }
360             }
361         }
362
363         return true;
364     }
365
366     bool matchAssertionBOL(ByteTerm& term)
367     {
368         return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + 1)));
369     }
370
371     bool matchAssertionEOL(ByteTerm& term)
372     {
373         if (term.inputPosition)
374             return (input.atEnd(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
375
376         return (input.atEnd()) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.read()));
377     }
378
379     bool matchAssertionWordBoundary(ByteTerm& term)
380     {
381         bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition + 1));
382         bool readIsWordchar;
383         if (term.inputPosition)
384             readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition));
385         else
386             readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read());
387
388         bool wordBoundary = prevIsWordchar != readIsWordchar;
389         return term.invert() ? !wordBoundary : wordBoundary;
390     }
391
392     bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context)
393     {
394         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
395
396         switch (term.atom.quantityType) {
397         case QuantifierFixedCount:
398             break;
399
400         case QuantifierGreedy:
401             if (backTrack->matchAmount) {
402                 --backTrack->matchAmount;
403                 input.uncheckInput(1);
404                 return true;
405             }
406             break;
407
408         case QuantifierNonGreedy:
409             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
410                 ++backTrack->matchAmount;
411                 if (checkCharacter(term.atom.patternCharacter, term.inputPosition + 1))
412                     return true;
413             }
414             input.uncheckInput(backTrack->matchAmount);
415             break;
416         }
417
418         return false;
419     }
420
421     bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context)
422     {
423         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
424
425         switch (term.atom.quantityType) {
426         case QuantifierFixedCount:
427             break;
428
429         case QuantifierGreedy:
430             if (backTrack->matchAmount) {
431                 --backTrack->matchAmount;
432                 input.uncheckInput(1);
433                 return true;
434             }
435             break;
436
437         case QuantifierNonGreedy:
438             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
439                 ++backTrack->matchAmount;
440                 if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition + 1))
441                     return true;
442             }
443             input.uncheckInput(backTrack->matchAmount);
444             break;
445         }
446
447         return false;
448     }
449
450     bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context)
451     {
452         ASSERT(term.type == ByteTerm::TypeCharacterClass);
453         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
454
455         switch (term.atom.quantityType) {
456         case QuantifierFixedCount: {
457             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
458                 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount))
459                     return false;
460             }
461             return true;
462         }
463
464         case QuantifierGreedy: {
465             unsigned matchAmount = 0;
466             while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
467                 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) {
468                     input.uncheckInput(1);
469                     break;
470                 }
471                 ++matchAmount;
472             }
473             backTrack->matchAmount = matchAmount;
474
475             return true;
476         }
477
478         case QuantifierNonGreedy:
479             backTrack->matchAmount = 0;
480             return true;
481         }
482
483         RELEASE_ASSERT_NOT_REACHED();
484         return false;
485     }
486
487     bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context)
488     {
489         ASSERT(term.type == ByteTerm::TypeCharacterClass);
490         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
491
492         switch (term.atom.quantityType) {
493         case QuantifierFixedCount:
494             break;
495
496         case QuantifierGreedy:
497             if (backTrack->matchAmount) {
498                 --backTrack->matchAmount;
499                 input.uncheckInput(1);
500                 return true;
501             }
502             break;
503
504         case QuantifierNonGreedy:
505             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
506                 ++backTrack->matchAmount;
507                 if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1))
508                     return true;
509             }
510             input.uncheckInput(backTrack->matchAmount);
511             break;
512         }
513
514         return false;
515     }
516
517     bool matchBackReference(ByteTerm& term, DisjunctionContext* context)
518     {
519         ASSERT(term.type == ByteTerm::TypeBackReference);
520         BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
521
522         unsigned matchBegin = output[(term.atom.subpatternId << 1)];
523         unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1];
524
525         // If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that.
526         // In this case the result of match is empty string like when it references to a parentheses with zero-width match.
527         // Eg.: /(a\1)/
528         if (matchEnd == offsetNoMatch)
529             return true;
530
531         if (matchBegin == offsetNoMatch)
532             return true;
533
534         ASSERT(matchBegin <= matchEnd);
535
536         if (matchBegin == matchEnd)
537             return true;
538
539         switch (term.atom.quantityType) {
540         case QuantifierFixedCount: {
541             backTrack->begin = input.getPos();
542             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
543                 if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
544                     input.setPos(backTrack->begin);
545                     return false;
546                 }
547             }
548             return true;
549         }
550
551         case QuantifierGreedy: {
552             unsigned matchAmount = 0;
553             while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition))
554                 ++matchAmount;
555             backTrack->matchAmount = matchAmount;
556             return true;
557         }
558
559         case QuantifierNonGreedy:
560             backTrack->begin = input.getPos();
561             backTrack->matchAmount = 0;
562             return true;
563         }
564
565         RELEASE_ASSERT_NOT_REACHED();
566         return false;
567     }
568
569     bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context)
570     {
571         ASSERT(term.type == ByteTerm::TypeBackReference);
572         BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
573
574         unsigned matchBegin = output[(term.atom.subpatternId << 1)];
575         unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1];
576
577         if (matchBegin == offsetNoMatch)
578             return false;
579
580         ASSERT(matchBegin <= matchEnd);
581
582         if (matchBegin == matchEnd)
583             return false;
584
585         switch (term.atom.quantityType) {
586         case QuantifierFixedCount:
587             // for quantityCount == 1, could rewind.
588             input.setPos(backTrack->begin);
589             break;
590
591         case QuantifierGreedy:
592             if (backTrack->matchAmount) {
593                 --backTrack->matchAmount;
594                 input.rewind(matchEnd - matchBegin);
595                 return true;
596             }
597             break;
598
599         case QuantifierNonGreedy:
600             if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
601                 ++backTrack->matchAmount;
602                 return true;
603             }
604             input.setPos(backTrack->begin);
605             break;
606         }
607
608         return false;
609     }
610
611     void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context)
612     {
613         if (term.capture()) {
614             unsigned subpatternId = term.atom.subpatternId;
615             output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition;
616             output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition;
617         }
618     }
619     void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context)
620     {
621         unsigned firstSubpatternId = term.atom.subpatternId;
622         unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
623         context->restoreOutput(output, firstSubpatternId, count);
624     }
625     JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
626     {
627         while (backTrack->matchAmount) {
628             ParenthesesDisjunctionContext* context = backTrack->lastContext;
629
630             JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true);
631             if (result == JSRegExpMatch)
632                 return JSRegExpMatch;
633
634             resetMatches(term, context);
635             popParenthesesDisjunctionContext(backTrack);
636             freeParenthesesDisjunctionContext(context);
637
638             if (result != JSRegExpNoMatch)
639                 return result;
640         }
641
642         return JSRegExpNoMatch;
643     }
644
645     bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
646     {
647         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
648         ASSERT(term.atom.quantityCount == 1);
649
650         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
651
652         switch (term.atom.quantityType) {
653         case QuantifierGreedy: {
654             // set this speculatively; if we get to the parens end this will be true.
655             backTrack->begin = input.getPos();
656             break;
657         }
658         case QuantifierNonGreedy: {
659             backTrack->begin = notFound;
660             context->term += term.atom.parenthesesWidth;
661             return true;
662         }
663         case QuantifierFixedCount:
664             break;
665         }
666
667         if (term.capture()) {
668             unsigned subpatternId = term.atom.subpatternId;
669             output[(subpatternId << 1)] = input.getPos() - term.inputPosition;
670         }
671
672         return true;
673     }
674
675     bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
676     {
677         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
678         ASSERT(term.atom.quantityCount == 1);
679
680         if (term.capture()) {
681             unsigned subpatternId = term.atom.subpatternId;
682             output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition;
683         }
684
685         if (term.atom.quantityType == QuantifierFixedCount)
686             return true;
687
688         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
689         return backTrack->begin != input.getPos();
690     }
691
692     bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
693     {
694         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
695         ASSERT(term.atom.quantityCount == 1);
696
697         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
698
699         if (term.capture()) {
700             unsigned subpatternId = term.atom.subpatternId;
701             output[(subpatternId << 1)] = offsetNoMatch;
702             output[(subpatternId << 1) + 1] = offsetNoMatch;
703         }
704
705         switch (term.atom.quantityType) {
706         case QuantifierGreedy:
707             // if we backtrack to this point, there is another chance - try matching nothing.
708             ASSERT(backTrack->begin != notFound);
709             backTrack->begin = notFound;
710             context->term += term.atom.parenthesesWidth;
711             return true;
712         case QuantifierNonGreedy:
713             ASSERT(backTrack->begin != notFound);
714         case QuantifierFixedCount:
715             break;
716         }
717
718         return false;
719     }
720
721     bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
722     {
723         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
724         ASSERT(term.atom.quantityCount == 1);
725
726         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
727
728         switch (term.atom.quantityType) {
729         case QuantifierGreedy:
730             if (backTrack->begin == notFound) {
731                 context->term -= term.atom.parenthesesWidth;
732                 return false;
733             }
734         case QuantifierNonGreedy:
735             if (backTrack->begin == notFound) {
736                 backTrack->begin = input.getPos();
737                 if (term.capture()) {
738                     // Technically this access to inputPosition should be accessing the begin term's
739                     // inputPosition, but for repeats other than fixed these values should be
740                     // the same anyway! (We don't pre-check for greedy or non-greedy matches.)
741                     ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
742                     ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition);
743                     unsigned subpatternId = term.atom.subpatternId;
744                     output[subpatternId << 1] = input.getPos() + term.inputPosition;
745                 }
746                 context->term -= term.atom.parenthesesWidth;
747                 return true;
748             }
749         case QuantifierFixedCount:
750             break;
751         }
752
753         return false;
754     }
755
756     bool matchParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
757     {
758         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
759         ASSERT(term.atom.quantityType == QuantifierGreedy);
760         ASSERT(term.atom.quantityCount == quantifyInfinite);
761         ASSERT(!term.capture());
762
763         BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
764         backTrack->begin = input.getPos();
765         return true;
766     }
767
768     bool matchParenthesesTerminalEnd(ByteTerm& term, DisjunctionContext* context)
769     {
770         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalEnd);
771
772         BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
773         // Empty match is a failed match.
774         if (backTrack->begin == input.getPos())
775             return false;
776
777         // Successful match! Okay, what's next? - loop around and try to match moar!
778         context->term -= (term.atom.parenthesesWidth + 1);
779         return true;
780     }
781
782     bool backtrackParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
783     {
784         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
785         ASSERT(term.atom.quantityType == QuantifierGreedy);
786         ASSERT(term.atom.quantityCount == quantifyInfinite);
787         ASSERT(!term.capture());
788
789         // If we backtrack to this point, we have failed to match this iteration of the parens.
790         // Since this is greedy / zero minimum a failed is also accepted as a match!
791         context->term += term.atom.parenthesesWidth;
792         return true;
793     }
794
795     bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*)
796     {
797         // 'Terminal' parentheses are at the end of the regex, and as such a match past end
798         // should always be returned as a successful match - we should never backtrack to here.
799         RELEASE_ASSERT_NOT_REACHED();
800         return false;
801     }
802
803     bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
804     {
805         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
806         ASSERT(term.atom.quantityCount == 1);
807
808         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
809
810         backTrack->begin = input.getPos();
811         return true;
812     }
813
814     bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
815     {
816         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
817         ASSERT(term.atom.quantityCount == 1);
818
819         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
820
821         input.setPos(backTrack->begin);
822
823         // We've reached the end of the parens; if they are inverted, this is failure.
824         if (term.invert()) {
825             context->term -= term.atom.parenthesesWidth;
826             return false;
827         }
828
829         return true;
830     }
831
832     bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
833     {
834         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
835         ASSERT(term.atom.quantityCount == 1);
836
837         // We've failed to match parens; if they are inverted, this is win!
838         if (term.invert()) {
839             context->term += term.atom.parenthesesWidth;
840             return true;
841         }
842
843         return false;
844     }
845
846     bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
847     {
848         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
849         ASSERT(term.atom.quantityCount == 1);
850
851         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
852
853         input.setPos(backTrack->begin);
854
855         context->term -= term.atom.parenthesesWidth;
856         return false;
857     }
858
859     JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context)
860     {
861         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
862
863         BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
864         ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
865
866         backTrack->matchAmount = 0;
867         backTrack->lastContext = 0;
868
869         switch (term.atom.quantityType) {
870         case QuantifierFixedCount: {
871             // While we haven't yet reached our fixed limit,
872             while (backTrack->matchAmount < term.atom.quantityCount) {
873                 // Try to do a match, and it it succeeds, add it to the list.
874                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
875                 JSRegExpResult result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
876                 if (result == JSRegExpMatch)
877                     appendParenthesesDisjunctionContext(backTrack, context);
878                 else {
879                     // The match failed; try to find an alternate point to carry on from.
880                     resetMatches(term, context);
881                     freeParenthesesDisjunctionContext(context);
882
883                     if (result != JSRegExpNoMatch)
884                         return result;
885                     JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
886                     if (backtrackResult != JSRegExpMatch)
887                         return backtrackResult;
888                 }
889             }
890
891             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
892             ParenthesesDisjunctionContext* context = backTrack->lastContext;
893             recordParenthesesMatch(term, context);
894             return JSRegExpMatch;
895         }
896
897         case QuantifierGreedy: {
898             while (backTrack->matchAmount < term.atom.quantityCount) {
899                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
900                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
901                 if (result == JSRegExpMatch)
902                     appendParenthesesDisjunctionContext(backTrack, context);
903                 else {
904                     resetMatches(term, context);
905                     freeParenthesesDisjunctionContext(context);
906
907                     if (result != JSRegExpNoMatch)
908                         return result;
909
910                     break;
911                 }
912             }
913
914             if (backTrack->matchAmount) {
915                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
916                 recordParenthesesMatch(term, context);
917             }
918             return JSRegExpMatch;
919         }
920
921         case QuantifierNonGreedy:
922             return JSRegExpMatch;
923         }
924
925         RELEASE_ASSERT_NOT_REACHED();
926         return JSRegExpErrorNoMatch;
927     }
928
929     // Rules for backtracking differ depending on whether this is greedy or non-greedy.
930     //
931     // Greedy matches never should try just adding more - you should already have done
932     // the 'more' cases.  Always backtrack, at least a leetle bit.  However cases where
933     // you backtrack an item off the list needs checking, since we'll never have matched
934     // the one less case.  Tracking forwards, still add as much as possible.
935     //
936     // Non-greedy, we've already done the one less case, so don't match on popping.
937     // We haven't done the one more case, so always try to add that.
938     //
939     JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
940     {
941         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
942
943         BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
944         ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
945
946         switch (term.atom.quantityType) {
947         case QuantifierFixedCount: {
948             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
949
950             ParenthesesDisjunctionContext* context = 0;
951             JSRegExpResult result = parenthesesDoBacktrack(term, backTrack);
952
953             if (result != JSRegExpMatch)
954                 return result;
955
956             // While we haven't yet reached our fixed limit,
957             while (backTrack->matchAmount < term.atom.quantityCount) {
958                 // Try to do a match, and it it succeeds, add it to the list.
959                 context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
960                 result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
961
962                 if (result == JSRegExpMatch)
963                     appendParenthesesDisjunctionContext(backTrack, context);
964                 else {
965                     // The match failed; try to find an alternate point to carry on from.
966                     resetMatches(term, context);
967                     freeParenthesesDisjunctionContext(context);
968
969                     if (result != JSRegExpNoMatch)
970                         return result;
971                     JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
972                     if (backtrackResult != JSRegExpMatch)
973                         return backtrackResult;
974                 }
975             }
976
977             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
978             context = backTrack->lastContext;
979             recordParenthesesMatch(term, context);
980             return JSRegExpMatch;
981         }
982
983         case QuantifierGreedy: {
984             if (!backTrack->matchAmount)
985                 return JSRegExpNoMatch;
986
987             ParenthesesDisjunctionContext* context = backTrack->lastContext;
988             JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
989             if (result == JSRegExpMatch) {
990                 while (backTrack->matchAmount < term.atom.quantityCount) {
991                     ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
992                     JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
993                     if (parenthesesResult == JSRegExpMatch)
994                         appendParenthesesDisjunctionContext(backTrack, context);
995                     else {
996                         resetMatches(term, context);
997                         freeParenthesesDisjunctionContext(context);
998
999                         if (parenthesesResult != JSRegExpNoMatch)
1000                             return parenthesesResult;
1001
1002                         break;
1003                     }
1004                 }
1005             } else {
1006                 resetMatches(term, context);
1007                 popParenthesesDisjunctionContext(backTrack);
1008                 freeParenthesesDisjunctionContext(context);
1009
1010                 if (result != JSRegExpNoMatch)
1011                     return result;
1012             }
1013
1014             if (backTrack->matchAmount) {
1015                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
1016                 recordParenthesesMatch(term, context);
1017             }
1018             return JSRegExpMatch;
1019         }
1020
1021         case QuantifierNonGreedy: {
1022             // If we've not reached the limit, try to add one more match.
1023             if (backTrack->matchAmount < term.atom.quantityCount) {
1024                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1025                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1026                 if (result == JSRegExpMatch) {
1027                     appendParenthesesDisjunctionContext(backTrack, context);
1028                     recordParenthesesMatch(term, context);
1029                     return JSRegExpMatch;
1030                 }
1031
1032                 resetMatches(term, context);
1033                 freeParenthesesDisjunctionContext(context);
1034
1035                 if (result != JSRegExpNoMatch)
1036                     return result;
1037             }
1038
1039             // Nope - okay backtrack looking for an alternative.
1040             while (backTrack->matchAmount) {
1041                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
1042                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
1043                 if (result == JSRegExpMatch) {
1044                     // successful backtrack! we're back in the game!
1045                     if (backTrack->matchAmount) {
1046                         context = backTrack->lastContext;
1047                         recordParenthesesMatch(term, context);
1048                     }
1049                     return JSRegExpMatch;
1050                 }
1051
1052                 // pop a match off the stack
1053                 resetMatches(term, context);
1054                 popParenthesesDisjunctionContext(backTrack);
1055                 freeParenthesesDisjunctionContext(context);
1056
1057                 if (result != JSRegExpNoMatch)
1058                     return result;
1059             }
1060
1061             return JSRegExpNoMatch;
1062         }
1063         }
1064
1065         RELEASE_ASSERT_NOT_REACHED();
1066         return JSRegExpErrorNoMatch;
1067     }
1068
1069     bool matchDotStarEnclosure(ByteTerm& term, DisjunctionContext* context)
1070     {
1071         UNUSED_PARAM(term);
1072         unsigned matchBegin = context->matchBegin;
1073
1074         if (matchBegin) {
1075             for (matchBegin--; true; matchBegin--) {
1076                 if (testCharacterClass(pattern->newlineCharacterClass, input.reread(matchBegin))) {
1077                     ++matchBegin;
1078                     break;
1079                 }
1080
1081                 if (!matchBegin)
1082                     break;
1083             }
1084         }
1085
1086         unsigned matchEnd = input.getPos();
1087
1088         for (; (matchEnd != input.end())
1089              && (!testCharacterClass(pattern->newlineCharacterClass, input.reread(matchEnd))); matchEnd++) { }
1090
1091         if (((matchBegin && term.anchors.m_bol)
1092              || ((matchEnd != input.end()) && term.anchors.m_eol))
1093             && !pattern->m_multiline)
1094             return false;
1095
1096         context->matchBegin = matchBegin;
1097         context->matchEnd = matchEnd;
1098         return true;
1099     }
1100
1101 #define MATCH_NEXT() { ++context->term; goto matchAgain; }
1102 #define BACKTRACK() { --context->term; goto backtrack; }
1103 #define currentTerm() (disjunction->terms[context->term])
1104     JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1105     {
1106         if (!--remainingMatchCount)
1107             return JSRegExpErrorHitLimit;
1108
1109         if (btrack)
1110             BACKTRACK();
1111
1112         context->matchBegin = input.getPos();
1113         context->term = 0;
1114
1115     matchAgain:
1116         ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1117
1118         switch (currentTerm().type) {
1119         case ByteTerm::TypeSubpatternBegin:
1120             MATCH_NEXT();
1121         case ByteTerm::TypeSubpatternEnd:
1122             context->matchEnd = input.getPos();
1123             return JSRegExpMatch;
1124
1125         case ByteTerm::TypeBodyAlternativeBegin:
1126             MATCH_NEXT();
1127         case ByteTerm::TypeBodyAlternativeDisjunction:
1128         case ByteTerm::TypeBodyAlternativeEnd:
1129             context->matchEnd = input.getPos();
1130             return JSRegExpMatch;
1131
1132         case ByteTerm::TypeAlternativeBegin:
1133             MATCH_NEXT();
1134         case ByteTerm::TypeAlternativeDisjunction:
1135         case ByteTerm::TypeAlternativeEnd: {
1136             int offset = currentTerm().alternative.end;
1137             BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1138             backTrack->offset = offset;
1139             context->term += offset;
1140             MATCH_NEXT();
1141         }
1142
1143         case ByteTerm::TypeAssertionBOL:
1144             if (matchAssertionBOL(currentTerm()))
1145                 MATCH_NEXT();
1146             BACKTRACK();
1147         case ByteTerm::TypeAssertionEOL:
1148             if (matchAssertionEOL(currentTerm()))
1149                 MATCH_NEXT();
1150             BACKTRACK();
1151         case ByteTerm::TypeAssertionWordBoundary:
1152             if (matchAssertionWordBoundary(currentTerm()))
1153                 MATCH_NEXT();
1154             BACKTRACK();
1155
1156         case ByteTerm::TypePatternCharacterOnce:
1157         case ByteTerm::TypePatternCharacterFixed: {
1158             for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
1159                 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - matchAmount))
1160                     BACKTRACK();
1161             }
1162             MATCH_NEXT();
1163         }
1164         case ByteTerm::TypePatternCharacterGreedy: {
1165             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1166             unsigned matchAmount = 0;
1167             while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
1168                 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + 1)) {
1169                     input.uncheckInput(1);
1170                     break;
1171                 }
1172                 ++matchAmount;
1173             }
1174             backTrack->matchAmount = matchAmount;
1175
1176             MATCH_NEXT();
1177         }
1178         case ByteTerm::TypePatternCharacterNonGreedy: {
1179             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1180             backTrack->matchAmount = 0;
1181             MATCH_NEXT();
1182         }
1183
1184         case ByteTerm::TypePatternCasedCharacterOnce:
1185         case ByteTerm::TypePatternCasedCharacterFixed: {
1186             for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
1187                 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount))
1188                     BACKTRACK();
1189             }
1190             MATCH_NEXT();
1191         }
1192         case ByteTerm::TypePatternCasedCharacterGreedy: {
1193             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1194             unsigned matchAmount = 0;
1195             while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
1196                 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + 1)) {
1197                     input.uncheckInput(1);
1198                     break;
1199                 }
1200                 ++matchAmount;
1201             }
1202             backTrack->matchAmount = matchAmount;
1203
1204             MATCH_NEXT();
1205         }
1206         case ByteTerm::TypePatternCasedCharacterNonGreedy: {
1207             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1208             backTrack->matchAmount = 0;
1209             MATCH_NEXT();
1210         }
1211
1212         case ByteTerm::TypeCharacterClass:
1213             if (matchCharacterClass(currentTerm(), context))
1214                 MATCH_NEXT();
1215             BACKTRACK();
1216         case ByteTerm::TypeBackReference:
1217             if (matchBackReference(currentTerm(), context))
1218                 MATCH_NEXT();
1219             BACKTRACK();
1220         case ByteTerm::TypeParenthesesSubpattern: {
1221             JSRegExpResult result = matchParentheses(currentTerm(), context);
1222
1223             if (result == JSRegExpMatch) {
1224                 MATCH_NEXT();
1225             }  else if (result != JSRegExpNoMatch)
1226                 return result;
1227
1228             BACKTRACK();
1229         }
1230         case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1231             if (matchParenthesesOnceBegin(currentTerm(), context))
1232                 MATCH_NEXT();
1233             BACKTRACK();
1234         case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1235             if (matchParenthesesOnceEnd(currentTerm(), context))
1236                 MATCH_NEXT();
1237             BACKTRACK();
1238         case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1239             if (matchParenthesesTerminalBegin(currentTerm(), context))
1240                 MATCH_NEXT();
1241             BACKTRACK();
1242         case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1243             if (matchParenthesesTerminalEnd(currentTerm(), context))
1244                 MATCH_NEXT();
1245             BACKTRACK();
1246         case ByteTerm::TypeParentheticalAssertionBegin:
1247             if (matchParentheticalAssertionBegin(currentTerm(), context))
1248                 MATCH_NEXT();
1249             BACKTRACK();
1250         case ByteTerm::TypeParentheticalAssertionEnd:
1251             if (matchParentheticalAssertionEnd(currentTerm(), context))
1252                 MATCH_NEXT();
1253             BACKTRACK();
1254
1255         case ByteTerm::TypeCheckInput:
1256             if (input.checkInput(currentTerm().checkInputCount))
1257                 MATCH_NEXT();
1258             BACKTRACK();
1259
1260         case ByteTerm::TypeUncheckInput:
1261             input.uncheckInput(currentTerm().checkInputCount);
1262             MATCH_NEXT();
1263                 
1264         case ByteTerm::TypeDotStarEnclosure:
1265             if (matchDotStarEnclosure(currentTerm(), context))
1266                 return JSRegExpMatch;
1267             BACKTRACK();
1268         }
1269
1270         // We should never fall-through to here.
1271         RELEASE_ASSERT_NOT_REACHED();
1272
1273     backtrack:
1274         ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1275
1276         switch (currentTerm().type) {
1277         case ByteTerm::TypeSubpatternBegin:
1278             return JSRegExpNoMatch;
1279         case ByteTerm::TypeSubpatternEnd:
1280             RELEASE_ASSERT_NOT_REACHED();
1281
1282         case ByteTerm::TypeBodyAlternativeBegin:
1283         case ByteTerm::TypeBodyAlternativeDisjunction: {
1284             int offset = currentTerm().alternative.next;
1285             context->term += offset;
1286             if (offset > 0)
1287                 MATCH_NEXT();
1288
1289             if (input.atEnd())
1290                 return JSRegExpNoMatch;
1291
1292             input.next();
1293
1294             context->matchBegin = input.getPos();
1295
1296             if (currentTerm().alternative.onceThrough)
1297                 context->term += currentTerm().alternative.next;
1298
1299             MATCH_NEXT();
1300         }
1301         case ByteTerm::TypeBodyAlternativeEnd:
1302             RELEASE_ASSERT_NOT_REACHED();
1303
1304         case ByteTerm::TypeAlternativeBegin:
1305         case ByteTerm::TypeAlternativeDisjunction: {
1306             int offset = currentTerm().alternative.next;
1307             context->term += offset;
1308             if (offset > 0)
1309                 MATCH_NEXT();
1310             BACKTRACK();
1311         }
1312         case ByteTerm::TypeAlternativeEnd: {
1313             // We should never backtrack back into an alternative of the main body of the regex.
1314             BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1315             unsigned offset = backTrack->offset;
1316             context->term -= offset;
1317             BACKTRACK();
1318         }
1319
1320         case ByteTerm::TypeAssertionBOL:
1321         case ByteTerm::TypeAssertionEOL:
1322         case ByteTerm::TypeAssertionWordBoundary:
1323             BACKTRACK();
1324
1325         case ByteTerm::TypePatternCharacterOnce:
1326         case ByteTerm::TypePatternCharacterFixed:
1327         case ByteTerm::TypePatternCharacterGreedy:
1328         case ByteTerm::TypePatternCharacterNonGreedy:
1329             if (backtrackPatternCharacter(currentTerm(), context))
1330                 MATCH_NEXT();
1331             BACKTRACK();
1332         case ByteTerm::TypePatternCasedCharacterOnce:
1333         case ByteTerm::TypePatternCasedCharacterFixed:
1334         case ByteTerm::TypePatternCasedCharacterGreedy:
1335         case ByteTerm::TypePatternCasedCharacterNonGreedy:
1336             if (backtrackPatternCasedCharacter(currentTerm(), context))
1337                 MATCH_NEXT();
1338             BACKTRACK();
1339         case ByteTerm::TypeCharacterClass:
1340             if (backtrackCharacterClass(currentTerm(), context))
1341                 MATCH_NEXT();
1342             BACKTRACK();
1343         case ByteTerm::TypeBackReference:
1344             if (backtrackBackReference(currentTerm(), context))
1345                 MATCH_NEXT();
1346             BACKTRACK();
1347         case ByteTerm::TypeParenthesesSubpattern: {
1348             JSRegExpResult result = backtrackParentheses(currentTerm(), context);
1349
1350             if (result == JSRegExpMatch) {
1351                 MATCH_NEXT();
1352             } else if (result != JSRegExpNoMatch)
1353                 return result;
1354
1355             BACKTRACK();
1356         }
1357         case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1358             if (backtrackParenthesesOnceBegin(currentTerm(), context))
1359                 MATCH_NEXT();
1360             BACKTRACK();
1361         case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1362             if (backtrackParenthesesOnceEnd(currentTerm(), context))
1363                 MATCH_NEXT();
1364             BACKTRACK();
1365         case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1366             if (backtrackParenthesesTerminalBegin(currentTerm(), context))
1367                 MATCH_NEXT();
1368             BACKTRACK();
1369         case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1370             if (backtrackParenthesesTerminalEnd(currentTerm(), context))
1371                 MATCH_NEXT();
1372             BACKTRACK();
1373         case ByteTerm::TypeParentheticalAssertionBegin:
1374             if (backtrackParentheticalAssertionBegin(currentTerm(), context))
1375                 MATCH_NEXT();
1376             BACKTRACK();
1377         case ByteTerm::TypeParentheticalAssertionEnd:
1378             if (backtrackParentheticalAssertionEnd(currentTerm(), context))
1379                 MATCH_NEXT();
1380             BACKTRACK();
1381
1382         case ByteTerm::TypeCheckInput:
1383             input.uncheckInput(currentTerm().checkInputCount);
1384             BACKTRACK();
1385
1386         case ByteTerm::TypeUncheckInput:
1387             input.checkInput(currentTerm().checkInputCount);
1388             BACKTRACK();
1389
1390         case ByteTerm::TypeDotStarEnclosure:
1391             RELEASE_ASSERT_NOT_REACHED();
1392         }
1393
1394         RELEASE_ASSERT_NOT_REACHED();
1395         return JSRegExpErrorNoMatch;
1396     }
1397
1398     JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1399     {
1400         JSRegExpResult result = matchDisjunction(disjunction, context, btrack);
1401
1402         if (result == JSRegExpMatch) {
1403             while (context->matchBegin == context->matchEnd) {
1404                 result = matchDisjunction(disjunction, context, true);
1405                 if (result != JSRegExpMatch)
1406                     return result;
1407             }
1408             return JSRegExpMatch;
1409         }
1410
1411         return result;
1412     }
1413
1414     unsigned interpret()
1415     {
1416         if (!input.isAvailableInput(0))
1417             return offsetNoMatch;
1418
1419         for (unsigned i = 0; i < pattern->m_body->m_numSubpatterns + 1; ++i)
1420             output[i << 1] = offsetNoMatch;
1421
1422         allocatorPool = pattern->m_allocator->startAllocator();
1423         RELEASE_ASSERT(allocatorPool);
1424
1425         DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
1426
1427         JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false);
1428         if (result == JSRegExpMatch) {
1429             output[0] = context->matchBegin;
1430             output[1] = context->matchEnd;
1431         }
1432
1433         freeDisjunctionContext(context);
1434
1435         pattern->m_allocator->stopAllocator();
1436
1437         ASSERT((result == JSRegExpMatch) == (output[0] != offsetNoMatch));
1438         return output[0];
1439     }
1440
1441     Interpreter(BytecodePattern* pattern, unsigned* output, const CharType* input, unsigned length, unsigned start)
1442         : pattern(pattern)
1443         , output(output)
1444         , input(input, start, length)
1445         , allocatorPool(0)
1446         , remainingMatchCount(matchLimit)
1447     {
1448     }
1449
1450 private:
1451     BytecodePattern* pattern;
1452     unsigned* output;
1453     InputStream input;
1454     BumpPointerPool* allocatorPool;
1455     unsigned remainingMatchCount;
1456 };
1457
1458
1459
1460 class ByteCompiler {
1461     struct ParenthesesStackEntry {
1462         unsigned beginTerm;
1463         unsigned savedAlternativeIndex;
1464         ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/)
1465             : beginTerm(beginTerm)
1466             , savedAlternativeIndex(savedAlternativeIndex)
1467         {
1468         }
1469     };
1470
1471 public:
1472     ByteCompiler(YarrPattern& pattern)
1473         : m_pattern(pattern)
1474     {
1475         m_currentAlternativeIndex = 0;
1476     }
1477
1478     PassOwnPtr<BytecodePattern> compile(BumpPointerAllocator* allocator)
1479     {
1480         regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough());
1481         emitDisjunction(m_pattern.m_body);
1482         regexEnd();
1483
1484         return adoptPtr(new BytecodePattern(m_bodyDisjunction.release(), m_allParenthesesInfo, m_pattern, allocator));
1485     }
1486
1487     void checkInput(unsigned count)
1488     {
1489         m_bodyDisjunction->terms.append(ByteTerm::CheckInput(count));
1490     }
1491
1492     void uncheckInput(unsigned count)
1493     {
1494         m_bodyDisjunction->terms.append(ByteTerm::UncheckInput(count));
1495     }
1496     
1497     void assertionBOL(unsigned inputPosition)
1498     {
1499         m_bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition));
1500     }
1501
1502     void assertionEOL(unsigned inputPosition)
1503     {
1504         m_bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition));
1505     }
1506
1507     void assertionWordBoundary(bool invert, unsigned inputPosition)
1508     {
1509         m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition));
1510     }
1511
1512     void atomPatternCharacter(UChar ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1513     {
1514         if (m_pattern.m_ignoreCase) {
1515             UChar lo = Unicode::toLower(ch);
1516             UChar hi = Unicode::toUpper(ch);
1517
1518             if (lo != hi) {
1519                 m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType));
1520                 return;
1521             }
1522         }
1523
1524         m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType));
1525     }
1526
1527     void atomCharacterClass(CharacterClass* characterClass, bool invert, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1528     {
1529         m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));
1530
1531         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet();
1532         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
1533         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1534     }
1535
1536     void atomBackReference(unsigned subpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1537     {
1538         ASSERT(subpatternId);
1539
1540         m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
1541
1542         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet();
1543         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
1544         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1545     }
1546
1547     void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1548     {
1549         int beginTerm = m_bodyDisjunction->terms.size();
1550
1551         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1552         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1553         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1554         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1555
1556         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1557         m_currentAlternativeIndex = beginTerm + 1;
1558     }
1559
1560     void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1561     {
1562         int beginTerm = m_bodyDisjunction->terms.size();
1563
1564         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition));
1565         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1566         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1567         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1568
1569         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1570         m_currentAlternativeIndex = beginTerm + 1;
1571     }
1572
1573     void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1574     {
1575         // Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin,
1576         // then fix this up at the end! - simplifying this should make it much clearer.
1577         // https://bugs.webkit.org/show_bug.cgi?id=50136
1578
1579         int beginTerm = m_bodyDisjunction->terms.size();
1580
1581         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1582         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1583         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1584         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1585
1586         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1587         m_currentAlternativeIndex = beginTerm + 1;
1588     }
1589
1590     void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation)
1591     {
1592         int beginTerm = m_bodyDisjunction->terms.size();
1593
1594         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, 0));
1595         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1596         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1597         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1598
1599         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1600         m_currentAlternativeIndex = beginTerm + 1;
1601     }
1602
1603     void atomParentheticalAssertionEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1604     {
1605         unsigned beginTerm = popParenthesesStack();
1606         closeAlternative(beginTerm + 1);
1607         unsigned endTerm = m_bodyDisjunction->terms.size();
1608
1609         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin);
1610
1611         bool invert = m_bodyDisjunction->terms[beginTerm].invert();
1612         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1613
1614         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd, subpatternId, false, invert, inputPosition));
1615         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1616         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1617         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1618
1619         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
1620         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1621         m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
1622         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1623     }
1624
1625     void assertionDotStarEnclosure(bool bolAnchored, bool eolAnchored)
1626     {
1627         m_bodyDisjunction->terms.append(ByteTerm::DotStarEnclosure(bolAnchored, eolAnchored));
1628     }
1629
1630     unsigned popParenthesesStack()
1631     {
1632         ASSERT(m_parenthesesStack.size());
1633         int stackEnd = m_parenthesesStack.size() - 1;
1634         unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm;
1635         m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex;
1636         m_parenthesesStack.shrink(stackEnd);
1637
1638         ASSERT(beginTerm < m_bodyDisjunction->terms.size());
1639         ASSERT(m_currentAlternativeIndex < m_bodyDisjunction->terms.size());
1640
1641         return beginTerm;
1642     }
1643
1644 #ifndef NDEBUG
1645     void dumpDisjunction(ByteDisjunction* disjunction)
1646     {
1647         dataLogF("ByteDisjunction(%p):\n\t", disjunction);
1648         for (unsigned i = 0; i < disjunction->terms.size(); ++i)
1649             dataLogF("{ %d } ", disjunction->terms[i].type);
1650         dataLogF("\n");
1651     }
1652 #endif
1653
1654     void closeAlternative(int beginTerm)
1655     {
1656         int origBeginTerm = beginTerm;
1657         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin);
1658         int endIndex = m_bodyDisjunction->terms.size();
1659
1660         unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
1661
1662         if (!m_bodyDisjunction->terms[beginTerm].alternative.next)
1663             m_bodyDisjunction->terms.remove(beginTerm);
1664         else {
1665             while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
1666                 beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
1667                 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction);
1668                 m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
1669                 m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1670             }
1671
1672             m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
1673
1674             m_bodyDisjunction->terms.append(ByteTerm::AlternativeEnd());
1675             m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
1676         }
1677     }
1678
1679     void closeBodyAlternative()
1680     {
1681         int beginTerm = 0;
1682         int origBeginTerm = 0;
1683         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);
1684         int endIndex = m_bodyDisjunction->terms.size();
1685
1686         unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
1687
1688         while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
1689             beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
1690             ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction);
1691             m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
1692             m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1693         }
1694
1695         m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
1696
1697         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd());
1698         m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
1699     }
1700
1701     void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0)
1702     {
1703         unsigned beginTerm = popParenthesesStack();
1704         closeAlternative(beginTerm + 1);
1705         unsigned endTerm = m_bodyDisjunction->terms.size();
1706
1707         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1708
1709         ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm];
1710
1711         bool capture = parenthesesBegin.capture();
1712         unsigned subpatternId = parenthesesBegin.atom.subpatternId;
1713
1714         unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
1715         ByteDisjunction* parenthesesDisjunction = new ByteDisjunction(numSubpatterns, callFrameSize);
1716
1717         parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin());
1718         for (unsigned termInParentheses = beginTerm + 1; termInParentheses < endTerm; ++termInParentheses)
1719             parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]);
1720         parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd());
1721
1722         m_bodyDisjunction->terms.shrink(beginTerm);
1723
1724         m_allParenthesesInfo.append(parenthesesDisjunction);
1725         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture, inputPosition));
1726
1727         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
1728         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1729         m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1730     }
1731
1732     void atomParenthesesOnceEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1733     {
1734         unsigned beginTerm = popParenthesesStack();
1735         closeAlternative(beginTerm + 1);
1736         unsigned endTerm = m_bodyDisjunction->terms.size();
1737
1738         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1739
1740         bool capture = m_bodyDisjunction->terms[beginTerm].capture();
1741         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1742
1743         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, capture, false, inputPosition));
1744         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1745         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1746         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1747
1748         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
1749         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1750         m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
1751         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1752     }
1753
1754     void atomParenthesesTerminalEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1755     {
1756         unsigned beginTerm = popParenthesesStack();
1757         closeAlternative(beginTerm + 1);
1758         unsigned endTerm = m_bodyDisjunction->terms.size();
1759
1760         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
1761
1762         bool capture = m_bodyDisjunction->terms[beginTerm].capture();
1763         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1764
1765         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, capture, false, inputPosition));
1766         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1767         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1768         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1769
1770         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
1771         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1772         m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
1773         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1774     }
1775
1776     void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough)
1777     {
1778         m_bodyDisjunction = adoptPtr(new ByteDisjunction(numSubpatterns, callFrameSize));
1779         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough));
1780         m_bodyDisjunction->terms[0].frameLocation = 0;
1781         m_currentAlternativeIndex = 0;
1782     }
1783
1784     void regexEnd()
1785     {
1786         closeBodyAlternative();
1787     }
1788
1789     void alternativeBodyDisjunction(bool onceThrough)
1790     {
1791         int newAlternativeIndex = m_bodyDisjunction->terms.size();
1792         m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
1793         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction(onceThrough));
1794
1795         m_currentAlternativeIndex = newAlternativeIndex;
1796     }
1797
1798     void alternativeDisjunction()
1799     {
1800         int newAlternativeIndex = m_bodyDisjunction->terms.size();
1801         m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
1802         m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction());
1803
1804         m_currentAlternativeIndex = newAlternativeIndex;
1805     }
1806
1807     void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0)
1808     {
1809         for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
1810             unsigned currentCountAlreadyChecked = inputCountAlreadyChecked;
1811
1812             PatternAlternative* alternative = disjunction->m_alternatives[alt];
1813
1814             if (alt) {
1815                 if (disjunction == m_pattern.m_body)
1816                     alternativeBodyDisjunction(alternative->onceThrough());
1817                 else
1818                     alternativeDisjunction();
1819             }
1820
1821             unsigned minimumSize = alternative->m_minimumSize;
1822             ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked);
1823             unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked;
1824
1825             if (countToCheck) {
1826                 checkInput(countToCheck);
1827                 currentCountAlreadyChecked += countToCheck;
1828             }
1829
1830             for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
1831                 PatternTerm& term = alternative->m_terms[i];
1832
1833                 switch (term.type) {
1834                 case PatternTerm::TypeAssertionBOL:
1835                     assertionBOL(currentCountAlreadyChecked - term.inputPosition);
1836                     break;
1837
1838                 case PatternTerm::TypeAssertionEOL:
1839                     assertionEOL(currentCountAlreadyChecked - term.inputPosition);
1840                     break;
1841
1842                 case PatternTerm::TypeAssertionWordBoundary:
1843                     assertionWordBoundary(term.invert(), currentCountAlreadyChecked - term.inputPosition);
1844                     break;
1845
1846                 case PatternTerm::TypePatternCharacter:
1847                     atomPatternCharacter(term.patternCharacter, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
1848                     break;
1849
1850                 case PatternTerm::TypeCharacterClass:
1851                     atomCharacterClass(term.characterClass, term.invert(), currentCountAlreadyChecked- term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
1852                     break;
1853
1854                 case PatternTerm::TypeBackReference:
1855                     atomBackReference(term.backReferenceSubpatternId, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
1856                         break;
1857
1858                 case PatternTerm::TypeForwardReference:
1859                     break;
1860
1861                 case PatternTerm::TypeParenthesesSubpattern: {
1862                     unsigned disjunctionAlreadyCheckedCount = 0;
1863                     if (term.quantityCount == 1 && !term.parentheses.isCopy) {
1864                         unsigned alternativeFrameLocation = term.frameLocation;
1865                         // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame.
1866                         if (term.quantityType == QuantifierFixedCount)
1867                             disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;
1868                         else
1869                             alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1870                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1871                         atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, alternativeFrameLocation);
1872                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
1873                         atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
1874                     } else if (term.parentheses.isTerminal) {
1875                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1876                         atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesOnce);
1877                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
1878                         atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
1879                     } else {
1880                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1881                         atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, 0);
1882                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
1883                         atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
1884                     }
1885                     break;
1886                 }
1887
1888                 case PatternTerm::TypeParentheticalAssertion: {
1889                     unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion;
1890
1891                     ASSERT(currentCountAlreadyChecked >= static_cast<unsigned>(term.inputPosition));
1892                     unsigned positiveInputOffset = currentCountAlreadyChecked - static_cast<unsigned>(term.inputPosition);
1893                     unsigned uncheckAmount = 0;
1894                     if (positiveInputOffset > term.parentheses.disjunction->m_minimumSize) {
1895                         uncheckAmount = positiveInputOffset - term.parentheses.disjunction->m_minimumSize;
1896                         uncheckInput(uncheckAmount);
1897                         currentCountAlreadyChecked -= uncheckAmount;
1898                     }
1899
1900                     atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation);
1901                     emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset - uncheckAmount);
1902                     atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityCount, term.quantityType);
1903                     if (uncheckAmount) {
1904                         checkInput(uncheckAmount);
1905                         currentCountAlreadyChecked += uncheckAmount;
1906                     }
1907                     break;
1908                 }
1909
1910                 case PatternTerm::TypeDotStarEnclosure:
1911                     assertionDotStarEnclosure(term.anchors.bolAnchor, term.anchors.eolAnchor);
1912                     break;
1913                 }
1914             }
1915         }
1916     }
1917
1918 private:
1919     YarrPattern& m_pattern;
1920     OwnPtr<ByteDisjunction> m_bodyDisjunction;
1921     unsigned m_currentAlternativeIndex;
1922     Vector<ParenthesesStackEntry> m_parenthesesStack;
1923     Vector<ByteDisjunction*> m_allParenthesesInfo;
1924 };
1925
1926 PassOwnPtr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator)
1927 {
1928     return ByteCompiler(pattern).compile(allocator);
1929 }
1930
1931 unsigned interpret(BytecodePattern* bytecode, const String& input, unsigned start, unsigned* output)
1932 {
1933     if (input.is8Bit())
1934         return Interpreter<LChar>(bytecode, output, input.characters8(), input.length(), start).interpret();
1935     return Interpreter<UChar>(bytecode, output, input.characters16(), input.length(), start).interpret();
1936 }
1937
1938 unsigned interpret(BytecodePattern* bytecode, const LChar* input, unsigned length, unsigned start, unsigned* output)
1939 {
1940     return Interpreter<LChar>(bytecode, output, input, length, start).interpret();
1941 }
1942
1943 unsigned interpret(BytecodePattern* bytecode, const UChar* input, unsigned length, unsigned start, unsigned* output)
1944 {
1945     return Interpreter<UChar>(bytecode, output, input, length, start).interpret();
1946 }
1947
1948 // These should be the same for both UChar & LChar.
1949 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter);
1950 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass);
1951 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference);
1952 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative);
1953 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion);
1954 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce);
1955 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheses) == (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses);
1956
1957
1958 } }