REGRESSION(r222601): We fail to properly backtrack into a sub pattern of a parenthesi...
[WebKit-https.git] / Source / JavaScriptCore / yarr / YarrInterpreter.cpp
1 /*
2  * Copyright (C) 2009, 2013-2017 Apple Inc. All rights reserved.
3  * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26
27 #include "config.h"
28 #include "YarrInterpreter.h"
29
30 #include "SuperSampler.h"
31 #include "Yarr.h"
32 #include "YarrCanonicalize.h"
33 #include <wtf/BumpPointerAllocator.h>
34 #include <wtf/DataLog.h>
35 #include <wtf/text/CString.h>
36 #include <wtf/text/WTFString.h>
37
38 using namespace WTF;
39
40 namespace JSC { namespace Yarr {
41
42 template<typename CharType>
43 class Interpreter {
44 public:
45     struct ParenthesesDisjunctionContext;
46
47     struct BackTrackInfoParentheses {
48         uintptr_t matchAmount;
49         ParenthesesDisjunctionContext* lastContext;
50     };
51
52     static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context)
53     {
54         context->next = backTrack->lastContext;
55         backTrack->lastContext = context;
56         ++backTrack->matchAmount;
57     }
58
59     static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack)
60     {
61         RELEASE_ASSERT(backTrack->matchAmount);
62         RELEASE_ASSERT(backTrack->lastContext);
63         backTrack->lastContext = backTrack->lastContext->next;
64         --backTrack->matchAmount;
65     }
66
67     struct DisjunctionContext
68     {
69         DisjunctionContext()
70             : term(0)
71         {
72         }
73
74         void* operator new(size_t, void* where)
75         {
76             return where;
77         }
78
79         int term;
80         unsigned matchBegin;
81         unsigned matchEnd;
82         uintptr_t frame[1];
83     };
84
85     DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction)
86     {
87         size_t size = sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
88         allocatorPool = allocatorPool->ensureCapacity(size);
89         RELEASE_ASSERT(allocatorPool);
90         return new (allocatorPool->alloc(size)) DisjunctionContext();
91     }
92
93     void freeDisjunctionContext(DisjunctionContext* context)
94     {
95         allocatorPool = allocatorPool->dealloc(context);
96     }
97
98     struct ParenthesesDisjunctionContext
99     {
100         ParenthesesDisjunctionContext(unsigned* output, ByteTerm& term)
101             : next(0)
102         {
103             unsigned firstSubpatternId = term.atom.subpatternId;
104             unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns;
105
106             for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) {
107                 subpatternBackup[i] = output[(firstSubpatternId << 1) + i];
108                 output[(firstSubpatternId << 1) + i] = offsetNoMatch;
109             }
110
111             new (getDisjunctionContext(term)) DisjunctionContext();
112         }
113
114         void* operator new(size_t, void* where)
115         {
116             return where;
117         }
118
119         void restoreOutput(unsigned* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns)
120         {
121             for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i)
122                 output[(firstSubpatternId << 1) + i] = subpatternBackup[i];
123         }
124
125         DisjunctionContext* getDisjunctionContext(ByteTerm& term)
126         {
127             return reinterpret_cast<DisjunctionContext*>(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1]));
128         }
129
130         ParenthesesDisjunctionContext* next;
131         unsigned subpatternBackup[1];
132     };
133
134     ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, unsigned* output, ByteTerm& term)
135     {
136         size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(unsigned) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + static_cast<size_t>(disjunction->m_frameSize) * sizeof(uintptr_t);
137         allocatorPool = allocatorPool->ensureCapacity(size);
138         RELEASE_ASSERT(allocatorPool);
139         return new (allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term);
140     }
141
142     void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context)
143     {
144         allocatorPool = allocatorPool->dealloc(context);
145     }
146
147     class InputStream {
148     public:
149         InputStream(const CharType* input, unsigned start, unsigned length, bool decodeSurrogatePairs)
150             : input(input)
151             , pos(start)
152             , length(length)
153             , decodeSurrogatePairs(decodeSurrogatePairs)
154         {
155         }
156
157         void next()
158         {
159             ++pos;
160         }
161
162         void rewind(unsigned amount)
163         {
164             ASSERT(pos >= amount);
165             pos -= amount;
166         }
167
168         int read()
169         {
170             ASSERT(pos < length);
171             if (pos < length)
172                 return input[pos];
173             return -1;
174         }
175
176         int readPair()
177         {
178             ASSERT(pos + 1 < length);
179             return input[pos] | input[pos + 1] << 16;
180         }
181
182         int readChecked(unsigned negativePositionOffest)
183         {
184             RELEASE_ASSERT(pos >= negativePositionOffest);
185             unsigned p = pos - negativePositionOffest;
186             ASSERT(p < length);
187             int result = input[p];
188             if (U16_IS_LEAD(result) && decodeSurrogatePairs && p + 1 < length && U16_IS_TRAIL(input[p + 1])) {
189                 if (atEnd())
190                     return -1;
191                 
192                 result = U16_GET_SUPPLEMENTARY(result, input[p + 1]);
193                 next();
194             }
195             return result;
196         }
197         
198         int readSurrogatePairChecked(unsigned negativePositionOffset)
199         {
200             RELEASE_ASSERT(pos >= negativePositionOffset);
201             unsigned p = pos - negativePositionOffset;
202             ASSERT(p < length);
203             if (p + 1 >= length)
204                 return -1;
205
206             int first = input[p];
207             int second = input[p + 1];
208             if (U16_IS_LEAD(first) && U16_IS_TRAIL(second))
209                 return U16_GET_SUPPLEMENTARY(first, second);
210
211             return -1;
212         }
213
214         int reread(unsigned from)
215         {
216             ASSERT(from < length);
217             int result = input[from];
218             if (U16_IS_LEAD(result) && decodeSurrogatePairs && from + 1 < length && U16_IS_TRAIL(input[from + 1]))
219                 result = U16_GET_SUPPLEMENTARY(result, input[from + 1]);
220             return result;
221         }
222
223         int prev()
224         {
225             ASSERT(!(pos > length));
226             if (pos && length)
227                 return input[pos - 1];
228             return -1;
229         }
230
231         unsigned getPos()
232         {
233             return pos;
234         }
235
236         void setPos(unsigned p)
237         {
238             pos = p;
239         }
240
241         bool atStart()
242         {
243             return pos == 0;
244         }
245
246         bool atEnd()
247         {
248             return pos == length;
249         }
250
251         unsigned end()
252         {
253             return length;
254         }
255
256         bool checkInput(unsigned count)
257         {
258             if (((pos + count) <= length) && ((pos + count) >= pos)) {
259                 pos += count;
260                 return true;
261             }
262             return false;
263         }
264
265         void uncheckInput(unsigned count)
266         {
267             RELEASE_ASSERT(pos >= count);
268             pos -= count;
269         }
270
271         bool atStart(unsigned negativePositionOffset)
272         {
273             return pos == negativePositionOffset;
274         }
275
276         bool atEnd(unsigned negativePositionOffest)
277         {
278             RELEASE_ASSERT(pos >= negativePositionOffest);
279             return (pos - negativePositionOffest) == length;
280         }
281
282         bool isAvailableInput(unsigned offset)
283         {
284             return (((pos + offset) <= length) && ((pos + offset) >= pos));
285         }
286
287     private:
288         const CharType* input;
289         unsigned pos;
290         unsigned length;
291         bool decodeSurrogatePairs;
292     };
293
294     bool testCharacterClass(CharacterClass* characterClass, int ch)
295     {
296         auto linearSearchMatches = [&ch](const Vector<UChar32>& matches) {
297             for (unsigned i = 0; i < matches.size(); ++i) {
298                 if (ch == matches[i])
299                     return true;
300             }
301
302             return false;
303         };
304
305         auto binarySearchMatches = [&ch](const Vector<UChar32>& matches) {
306             size_t low = 0;
307             size_t high = matches.size() - 1;
308
309             while (low <= high) {
310                 size_t mid = low + (high - low) / 2;
311                 int diff = ch - matches[mid];
312                 if (!diff)
313                     return true;
314
315                 if (diff < 0) {
316                     if (mid == low)
317                         return false;
318                     high = mid - 1;
319                 } else
320                     low = mid + 1;
321             }
322             return false;
323         };
324
325         auto linearSearchRanges = [&ch](const Vector<CharacterRange>& ranges) {
326             for (unsigned i = 0; i < ranges.size(); ++i) {
327                 if ((ch >= ranges[i].begin) && (ch <= ranges[i].end))
328                     return true;
329             }
330
331             return false;
332         };
333
334         auto binarySearchRanges = [&ch](const Vector<CharacterRange>& ranges) {
335             size_t low = 0;
336             size_t high = ranges.size() - 1;
337
338             while (low <= high) {
339                 size_t mid = low + (high - low) / 2;
340                 int rangeBeginDiff = ch - ranges[mid].begin;
341                 if (rangeBeginDiff >= 0 && ch <= ranges[mid].end)
342                     return true;
343
344                 if (rangeBeginDiff < 0) {
345                     if (mid == low)
346                         return false;
347                     high = mid - 1;
348                 } else
349                     low = mid + 1;
350             }
351             return false;
352         };
353
354         const size_t thresholdForBinarySearch = 6;
355
356         if (!isASCII(ch)) {
357             if (characterClass->m_matchesUnicode.size()) {
358                 if (characterClass->m_matchesUnicode.size() > thresholdForBinarySearch) {
359                     if (binarySearchMatches(characterClass->m_matchesUnicode))
360                         return true;
361                 } else if (linearSearchMatches(characterClass->m_matchesUnicode))
362                     return true;
363             }
364
365             if (characterClass->m_rangesUnicode.size()) {
366                 if (characterClass->m_rangesUnicode.size() > thresholdForBinarySearch) {
367                     if (binarySearchRanges(characterClass->m_rangesUnicode))
368                         return true;
369                 } else if (linearSearchRanges(characterClass->m_rangesUnicode))
370                     return true;
371             }
372         } else {
373             if (characterClass->m_matches.size()) {
374                 if (characterClass->m_matches.size() > thresholdForBinarySearch) {
375                     if (binarySearchMatches(characterClass->m_matches))
376                         return true;
377                 } else if (linearSearchMatches(characterClass->m_matches))
378                     return true;
379             }
380
381             if (characterClass->m_ranges.size()) {
382                 if (characterClass->m_ranges.size() > thresholdForBinarySearch) {
383                     if (binarySearchRanges(characterClass->m_ranges))
384                         return true;
385                 } else if (linearSearchRanges(characterClass->m_ranges))
386                     return true;
387             }
388         }
389
390         return false;
391     }
392
393     bool checkCharacter(int testChar, unsigned negativeInputOffset)
394     {
395         return testChar == input.readChecked(negativeInputOffset);
396     }
397
398     bool checkSurrogatePair(int testUnicodeChar, unsigned negativeInputOffset)
399     {
400         return testUnicodeChar == input.readSurrogatePairChecked(negativeInputOffset);
401     }
402
403     bool checkCasedCharacter(int loChar, int hiChar, unsigned negativeInputOffset)
404     {
405         int ch = input.readChecked(negativeInputOffset);
406         return (loChar == ch) || (hiChar == ch);
407     }
408
409     bool checkCharacterClass(CharacterClass* characterClass, bool invert, unsigned negativeInputOffset)
410     {
411         bool match = testCharacterClass(characterClass, input.readChecked(negativeInputOffset));
412         return invert ? !match : match;
413     }
414
415     bool tryConsumeBackReference(int matchBegin, int matchEnd, unsigned negativeInputOffset)
416     {
417         unsigned matchSize = (unsigned)(matchEnd - matchBegin);
418
419         if (!input.checkInput(matchSize))
420             return false;
421
422         for (unsigned i = 0; i < matchSize; ++i) {
423             int oldCh = input.reread(matchBegin + i);
424             int ch;
425             if (!U_IS_BMP(oldCh)) {
426                 ch = input.readSurrogatePairChecked(negativeInputOffset + matchSize - i);
427                 ++i;
428             } else
429                 ch = input.readChecked(negativeInputOffset + matchSize - i);
430
431             if (oldCh == ch)
432                 continue;
433
434             if (pattern->ignoreCase()) {
435                 // See ES 6.0, 21.2.2.8.2 for the definition of Canonicalize(). For non-Unicode
436                 // patterns, Unicode values are never allowed to match against ASCII ones.
437                 // For Unicode, we need to check all canonical equivalents of a character.
438                 if (!unicode && (isASCII(oldCh) || isASCII(ch))) {
439                     if (toASCIIUpper(oldCh) == toASCIIUpper(ch))
440                         continue;
441                 } else if (areCanonicallyEquivalent(oldCh, ch, unicode ? CanonicalMode::Unicode : CanonicalMode::UCS2))
442                     continue;
443             }
444
445             input.uncheckInput(matchSize);
446             return false;
447         }
448
449         return true;
450     }
451
452     bool matchAssertionBOL(ByteTerm& term)
453     {
454         return (input.atStart(term.inputPosition)) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + 1)));
455     }
456
457     bool matchAssertionEOL(ByteTerm& term)
458     {
459         if (term.inputPosition)
460             return (input.atEnd(term.inputPosition)) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
461
462         return (input.atEnd()) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.read()));
463     }
464
465     bool matchAssertionWordBoundary(ByteTerm& term)
466     {
467         bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition + 1));
468         bool readIsWordchar;
469         if (term.inputPosition)
470             readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition));
471         else
472             readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read());
473
474         bool wordBoundary = prevIsWordchar != readIsWordchar;
475         return term.invert() ? !wordBoundary : wordBoundary;
476     }
477
478     bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context)
479     {
480         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
481
482         switch (term.atom.quantityType) {
483         case QuantifierFixedCount:
484             break;
485
486         case QuantifierGreedy:
487             if (backTrack->matchAmount) {
488                 --backTrack->matchAmount;
489                 input.uncheckInput(U16_LENGTH(term.atom.patternCharacter));
490                 return true;
491             }
492             break;
493
494         case QuantifierNonGreedy:
495             if ((backTrack->matchAmount < term.atom.quantityMaxCount) && input.checkInput(1)) {
496                 ++backTrack->matchAmount;
497                 if (checkCharacter(term.atom.patternCharacter, term.inputPosition + 1))
498                     return true;
499             }
500             input.setPos(backTrack->begin);
501             break;
502         }
503
504         return false;
505     }
506
507     bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context)
508     {
509         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
510
511         switch (term.atom.quantityType) {
512         case QuantifierFixedCount:
513             break;
514
515         case QuantifierGreedy:
516             if (backTrack->matchAmount) {
517                 --backTrack->matchAmount;
518                 input.uncheckInput(1);
519                 return true;
520             }
521             break;
522
523         case QuantifierNonGreedy:
524             if ((backTrack->matchAmount < term.atom.quantityMaxCount) && input.checkInput(1)) {
525                 ++backTrack->matchAmount;
526                 if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition + 1))
527                     return true;
528             }
529             input.uncheckInput(backTrack->matchAmount);
530             break;
531         }
532
533         return false;
534     }
535
536     bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context)
537     {
538         ASSERT(term.type == ByteTerm::TypeCharacterClass);
539         BackTrackInfoCharacterClass* backTrack = reinterpret_cast<BackTrackInfoCharacterClass*>(context->frame + term.frameLocation);
540
541         switch (term.atom.quantityType) {
542         case QuantifierFixedCount: {
543             if (unicode) {
544                 backTrack->begin = input.getPos();
545                 unsigned matchAmount = 0;
546                 for (matchAmount = 0; matchAmount < term.atom.quantityMaxCount; ++matchAmount) {
547                     if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount)) {
548                         input.setPos(backTrack->begin);
549                         return false;
550                     }
551                 }
552
553                 return true;
554             }
555
556             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityMaxCount; ++matchAmount) {
557                 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount))
558                     return false;
559             }
560             return true;
561         }
562
563         case QuantifierGreedy: {
564             unsigned position = input.getPos();
565             backTrack->begin = position;
566             unsigned matchAmount = 0;
567             while ((matchAmount < term.atom.quantityMaxCount) && input.checkInput(1)) {
568                 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) {
569                     input.setPos(position);
570                     break;
571                 }
572                 ++matchAmount;
573                 position = input.getPos();
574             }
575             backTrack->matchAmount = matchAmount;
576
577             return true;
578         }
579
580         case QuantifierNonGreedy:
581             backTrack->begin = input.getPos();
582             backTrack->matchAmount = 0;
583             return true;
584         }
585
586         RELEASE_ASSERT_NOT_REACHED();
587         return false;
588     }
589
590     bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context)
591     {
592         ASSERT(term.type == ByteTerm::TypeCharacterClass);
593         BackTrackInfoCharacterClass* backTrack = reinterpret_cast<BackTrackInfoCharacterClass*>(context->frame + term.frameLocation);
594
595         switch (term.atom.quantityType) {
596         case QuantifierFixedCount:
597             if (unicode)
598                 input.setPos(backTrack->begin);
599             break;
600
601         case QuantifierGreedy:
602             if (backTrack->matchAmount) {
603                 if (unicode) {
604                     // Rematch one less match
605                     input.setPos(backTrack->begin);
606                     --backTrack->matchAmount;
607                     for (unsigned matchAmount = 0; (matchAmount < backTrack->matchAmount) && input.checkInput(1); ++matchAmount) {
608                         if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) {
609                             input.uncheckInput(1);
610                             break;
611                         }
612                     }
613                     return true;
614                 }
615                 --backTrack->matchAmount;
616                 input.uncheckInput(1);
617                 return true;
618             }
619             break;
620
621         case QuantifierNonGreedy:
622             if ((backTrack->matchAmount < term.atom.quantityMaxCount) && input.checkInput(1)) {
623                 ++backTrack->matchAmount;
624                 if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1))
625                     return true;
626             }
627             input.setPos(backTrack->begin);
628             break;
629         }
630
631         return false;
632     }
633
634     bool matchBackReference(ByteTerm& term, DisjunctionContext* context)
635     {
636         ASSERT(term.type == ByteTerm::TypeBackReference);
637         BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
638
639         unsigned matchBegin = output[(term.atom.subpatternId << 1)];
640         unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1];
641
642         // If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that.
643         // In this case the result of match is empty string like when it references to a parentheses with zero-width match.
644         // Eg.: /(a\1)/
645         if (matchEnd == offsetNoMatch)
646             return true;
647
648         if (matchBegin == offsetNoMatch)
649             return true;
650
651         ASSERT(matchBegin <= matchEnd);
652
653         if (matchBegin == matchEnd)
654             return true;
655
656         switch (term.atom.quantityType) {
657         case QuantifierFixedCount: {
658             backTrack->begin = input.getPos();
659             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityMaxCount; ++matchAmount) {
660                 if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
661                     input.setPos(backTrack->begin);
662                     return false;
663                 }
664             }
665             return true;
666         }
667
668         case QuantifierGreedy: {
669             unsigned matchAmount = 0;
670             while ((matchAmount < term.atom.quantityMaxCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition))
671                 ++matchAmount;
672             backTrack->matchAmount = matchAmount;
673             return true;
674         }
675
676         case QuantifierNonGreedy:
677             backTrack->begin = input.getPos();
678             backTrack->matchAmount = 0;
679             return true;
680         }
681
682         RELEASE_ASSERT_NOT_REACHED();
683         return false;
684     }
685
686     bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context)
687     {
688         ASSERT(term.type == ByteTerm::TypeBackReference);
689         BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
690
691         unsigned matchBegin = output[(term.atom.subpatternId << 1)];
692         unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1];
693
694         if (matchBegin == offsetNoMatch)
695             return false;
696
697         ASSERT(matchBegin <= matchEnd);
698
699         if (matchBegin == matchEnd)
700             return false;
701
702         switch (term.atom.quantityType) {
703         case QuantifierFixedCount:
704             // for quantityMaxCount == 1, could rewind.
705             input.setPos(backTrack->begin);
706             break;
707
708         case QuantifierGreedy:
709             if (backTrack->matchAmount) {
710                 --backTrack->matchAmount;
711                 input.rewind(matchEnd - matchBegin);
712                 return true;
713             }
714             break;
715
716         case QuantifierNonGreedy:
717             if ((backTrack->matchAmount < term.atom.quantityMaxCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
718                 ++backTrack->matchAmount;
719                 return true;
720             }
721             input.setPos(backTrack->begin);
722             break;
723         }
724
725         return false;
726     }
727
728     void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context)
729     {
730         if (term.capture()) {
731             unsigned subpatternId = term.atom.subpatternId;
732             output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin - term.inputPosition;
733             output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd - term.inputPosition;
734         }
735     }
736     void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context)
737     {
738         unsigned firstSubpatternId = term.atom.subpatternId;
739         unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
740         context->restoreOutput(output, firstSubpatternId, count);
741     }
742     JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
743     {
744         while (backTrack->matchAmount) {
745             ParenthesesDisjunctionContext* context = backTrack->lastContext;
746
747             JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true);
748             if (result == JSRegExpMatch)
749                 return JSRegExpMatch;
750
751             resetMatches(term, context);
752             popParenthesesDisjunctionContext(backTrack);
753             freeParenthesesDisjunctionContext(context);
754
755             if (result != JSRegExpNoMatch)
756                 return result;
757         }
758
759         return JSRegExpNoMatch;
760     }
761
762     bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
763     {
764         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
765         ASSERT(term.atom.quantityMaxCount == 1);
766
767         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
768
769         switch (term.atom.quantityType) {
770         case QuantifierGreedy: {
771             // set this speculatively; if we get to the parens end this will be true.
772             backTrack->begin = input.getPos();
773             break;
774         }
775         case QuantifierNonGreedy: {
776             backTrack->begin = notFound;
777             context->term += term.atom.parenthesesWidth;
778             return true;
779         }
780         case QuantifierFixedCount:
781             break;
782         }
783
784         if (term.capture()) {
785             unsigned subpatternId = term.atom.subpatternId;
786             output[(subpatternId << 1)] = input.getPos() - term.inputPosition;
787         }
788
789         return true;
790     }
791
792     bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
793     {
794         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
795         ASSERT(term.atom.quantityMaxCount == 1);
796
797         if (term.capture()) {
798             unsigned subpatternId = term.atom.subpatternId;
799             output[(subpatternId << 1) + 1] = input.getPos() - term.inputPosition;
800         }
801
802         if (term.atom.quantityType == QuantifierFixedCount)
803             return true;
804
805         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
806         return backTrack->begin != input.getPos();
807     }
808
809     bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
810     {
811         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
812         ASSERT(term.atom.quantityMaxCount == 1);
813
814         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
815
816         if (term.capture()) {
817             unsigned subpatternId = term.atom.subpatternId;
818             output[(subpatternId << 1)] = offsetNoMatch;
819             output[(subpatternId << 1) + 1] = offsetNoMatch;
820         }
821
822         switch (term.atom.quantityType) {
823         case QuantifierGreedy:
824             // if we backtrack to this point, there is another chance - try matching nothing.
825             ASSERT(backTrack->begin != notFound);
826             backTrack->begin = notFound;
827             context->term += term.atom.parenthesesWidth;
828             return true;
829         case QuantifierNonGreedy:
830             ASSERT(backTrack->begin != notFound);
831             FALLTHROUGH;
832         case QuantifierFixedCount:
833             break;
834         }
835
836         return false;
837     }
838
839     bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
840     {
841         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
842         ASSERT(term.atom.quantityMaxCount == 1);
843
844         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
845
846         switch (term.atom.quantityType) {
847         case QuantifierGreedy:
848             if (backTrack->begin == notFound) {
849                 context->term -= term.atom.parenthesesWidth;
850                 return false;
851             }
852             FALLTHROUGH;
853         case QuantifierNonGreedy:
854             if (backTrack->begin == notFound) {
855                 backTrack->begin = input.getPos();
856                 if (term.capture()) {
857                     // Technically this access to inputPosition should be accessing the begin term's
858                     // inputPosition, but for repeats other than fixed these values should be
859                     // the same anyway! (We don't pre-check for greedy or non-greedy matches.)
860                     ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
861                     ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition);
862                     unsigned subpatternId = term.atom.subpatternId;
863                     output[subpatternId << 1] = input.getPos() - term.inputPosition;
864                 }
865                 context->term -= term.atom.parenthesesWidth;
866                 return true;
867             }
868             FALLTHROUGH;
869         case QuantifierFixedCount:
870             break;
871         }
872
873         return false;
874     }
875
876     bool matchParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
877     {
878         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
879         ASSERT(term.atom.quantityType == QuantifierGreedy);
880         ASSERT(term.atom.quantityMaxCount == quantifyInfinite);
881         ASSERT(!term.capture());
882
883         BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
884         backTrack->begin = input.getPos();
885         return true;
886     }
887
888     bool matchParenthesesTerminalEnd(ByteTerm& term, DisjunctionContext* context)
889     {
890         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalEnd);
891
892         BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
893         // Empty match is a failed match.
894         if (backTrack->begin == input.getPos())
895             return false;
896
897         // Successful match! Okay, what's next? - loop around and try to match more!
898         context->term -= (term.atom.parenthesesWidth + 1);
899         return true;
900     }
901
902     bool backtrackParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
903     {
904         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
905         ASSERT(term.atom.quantityType == QuantifierGreedy);
906         ASSERT(term.atom.quantityMaxCount == quantifyInfinite);
907         ASSERT(!term.capture());
908
909         // If we backtrack to this point, we have failed to match this iteration of the parens.
910         // Since this is greedy / zero minimum a failed is also accepted as a match!
911         context->term += term.atom.parenthesesWidth;
912         return true;
913     }
914
915     bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*)
916     {
917         // 'Terminal' parentheses are at the end of the regex, and as such a match past end
918         // should always be returned as a successful match - we should never backtrack to here.
919         RELEASE_ASSERT_NOT_REACHED();
920         return false;
921     }
922
923     bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
924     {
925         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
926         ASSERT(term.atom.quantityMaxCount == 1);
927
928         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
929
930         backTrack->begin = input.getPos();
931         return true;
932     }
933
934     bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
935     {
936         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
937         ASSERT(term.atom.quantityMaxCount == 1);
938
939         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
940
941         input.setPos(backTrack->begin);
942
943         // We've reached the end of the parens; if they are inverted, this is failure.
944         if (term.invert()) {
945             context->term -= term.atom.parenthesesWidth;
946             return false;
947         }
948
949         return true;
950     }
951
952     bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
953     {
954         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
955         ASSERT(term.atom.quantityMaxCount == 1);
956
957         // We've failed to match parens; if they are inverted, this is win!
958         if (term.invert()) {
959             context->term += term.atom.parenthesesWidth;
960             return true;
961         }
962
963         return false;
964     }
965
966     bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
967     {
968         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
969         ASSERT(term.atom.quantityMaxCount == 1);
970
971         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
972
973         input.setPos(backTrack->begin);
974
975         context->term -= term.atom.parenthesesWidth;
976         return false;
977     }
978
979     JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context)
980     {
981         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
982
983         BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
984         ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
985
986         backTrack->matchAmount = 0;
987         backTrack->lastContext = 0;
988
989         ASSERT(term.atom.quantityType != QuantifierFixedCount || term.atom.quantityMinCount == term.atom.quantityMaxCount);
990
991         unsigned minimumMatchCount = term.atom.quantityMinCount;
992         JSRegExpResult fixedMatchResult;
993
994         // Handle fixed matches and the minimum part of a variable length match.
995         if (minimumMatchCount) {
996             // While we haven't yet reached our fixed limit,
997             while (backTrack->matchAmount < minimumMatchCount) {
998                 // Try to do a match, and it it succeeds, add it to the list.
999                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1000                 fixedMatchResult = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1001                 if (fixedMatchResult == JSRegExpMatch)
1002                     appendParenthesesDisjunctionContext(backTrack, context);
1003                 else {
1004                     // The match failed; try to find an alternate point to carry on from.
1005                     resetMatches(term, context);
1006                     freeParenthesesDisjunctionContext(context);
1007                     
1008                     if (fixedMatchResult != JSRegExpNoMatch)
1009                         return fixedMatchResult;
1010                     JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
1011                     if (backtrackResult != JSRegExpMatch)
1012                         return backtrackResult;
1013                 }
1014             }
1015
1016             ParenthesesDisjunctionContext* context = backTrack->lastContext;
1017             recordParenthesesMatch(term, context);
1018         }
1019
1020         switch (term.atom.quantityType) {
1021         case QuantifierFixedCount: {
1022             ASSERT(backTrack->matchAmount == term.atom.quantityMaxCount);
1023             return JSRegExpMatch;
1024         }
1025
1026         case QuantifierGreedy: {
1027             while (backTrack->matchAmount < term.atom.quantityMaxCount) {
1028                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1029                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1030                 if (result == JSRegExpMatch)
1031                     appendParenthesesDisjunctionContext(backTrack, context);
1032                 else {
1033                     resetMatches(term, context);
1034                     freeParenthesesDisjunctionContext(context);
1035
1036                     if (result != JSRegExpNoMatch)
1037                         return result;
1038
1039                     break;
1040                 }
1041             }
1042
1043             if (backTrack->matchAmount) {
1044                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
1045                 recordParenthesesMatch(term, context);
1046             }
1047             return JSRegExpMatch;
1048         }
1049
1050         case QuantifierNonGreedy:
1051             return JSRegExpMatch;
1052         }
1053
1054         RELEASE_ASSERT_NOT_REACHED();
1055         return JSRegExpErrorNoMatch;
1056     }
1057
1058     // Rules for backtracking differ depending on whether this is greedy or non-greedy.
1059     //
1060     // Greedy matches never should try just adding more - you should already have done
1061     // the 'more' cases.  Always backtrack, at least a leetle bit.  However cases where
1062     // you backtrack an item off the list needs checking, since we'll never have matched
1063     // the one less case.  Tracking forwards, still add as much as possible.
1064     //
1065     // Non-greedy, we've already done the one less case, so don't match on popping.
1066     // We haven't done the one more case, so always try to add that.
1067     //
1068     JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
1069     {
1070         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
1071
1072         BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
1073         ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
1074
1075         switch (term.atom.quantityType) {
1076         case QuantifierFixedCount: {
1077             ASSERT(backTrack->matchAmount == term.atom.quantityMaxCount);
1078
1079             ParenthesesDisjunctionContext* context = 0;
1080             JSRegExpResult result = parenthesesDoBacktrack(term, backTrack);
1081
1082             if (result != JSRegExpMatch)
1083                 return result;
1084
1085             // While we haven't yet reached our fixed limit,
1086             while (backTrack->matchAmount < term.atom.quantityMaxCount) {
1087                 // Try to do a match, and it it succeeds, add it to the list.
1088                 context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1089                 result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1090
1091                 if (result == JSRegExpMatch)
1092                     appendParenthesesDisjunctionContext(backTrack, context);
1093                 else {
1094                     // The match failed; try to find an alternate point to carry on from.
1095                     resetMatches(term, context);
1096                     freeParenthesesDisjunctionContext(context);
1097
1098                     if (result != JSRegExpNoMatch)
1099                         return result;
1100                     JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
1101                     if (backtrackResult != JSRegExpMatch)
1102                         return backtrackResult;
1103                 }
1104             }
1105
1106             ASSERT(backTrack->matchAmount == term.atom.quantityMaxCount);
1107             context = backTrack->lastContext;
1108             recordParenthesesMatch(term, context);
1109             return JSRegExpMatch;
1110         }
1111
1112         case QuantifierGreedy: {
1113             if (!backTrack->matchAmount)
1114                 return JSRegExpNoMatch;
1115
1116             ParenthesesDisjunctionContext* context = backTrack->lastContext;
1117             JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
1118             if (result == JSRegExpMatch) {
1119                 while (backTrack->matchAmount < term.atom.quantityMaxCount) {
1120                     ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1121                     JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1122                     if (parenthesesResult == JSRegExpMatch)
1123                         appendParenthesesDisjunctionContext(backTrack, context);
1124                     else {
1125                         resetMatches(term, context);
1126                         freeParenthesesDisjunctionContext(context);
1127
1128                         if (parenthesesResult != JSRegExpNoMatch)
1129                             return parenthesesResult;
1130
1131                         break;
1132                     }
1133                 }
1134             } else {
1135                 resetMatches(term, context);
1136                 popParenthesesDisjunctionContext(backTrack);
1137                 freeParenthesesDisjunctionContext(context);
1138
1139                 if (result != JSRegExpNoMatch || backTrack->matchAmount < term.atom.quantityMinCount)
1140                     return result;
1141             }
1142
1143             if (backTrack->matchAmount) {
1144                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
1145                 recordParenthesesMatch(term, context);
1146             }
1147             return JSRegExpMatch;
1148         }
1149
1150         case QuantifierNonGreedy: {
1151             // If we've not reached the limit, try to add one more match.
1152             if (backTrack->matchAmount < term.atom.quantityMaxCount) {
1153                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1154                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1155                 if (result == JSRegExpMatch) {
1156                     appendParenthesesDisjunctionContext(backTrack, context);
1157                     recordParenthesesMatch(term, context);
1158                     return JSRegExpMatch;
1159                 }
1160
1161                 resetMatches(term, context);
1162                 freeParenthesesDisjunctionContext(context);
1163
1164                 if (result != JSRegExpNoMatch)
1165                     return result;
1166             }
1167
1168             // Nope - okay backtrack looking for an alternative.
1169             while (backTrack->matchAmount) {
1170                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
1171                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
1172                 if (result == JSRegExpMatch) {
1173                     // successful backtrack! we're back in the game!
1174                     if (backTrack->matchAmount) {
1175                         context = backTrack->lastContext;
1176                         recordParenthesesMatch(term, context);
1177                     }
1178                     return JSRegExpMatch;
1179                 }
1180
1181                 // pop a match off the stack
1182                 resetMatches(term, context);
1183                 popParenthesesDisjunctionContext(backTrack);
1184                 freeParenthesesDisjunctionContext(context);
1185
1186                 if (result != JSRegExpNoMatch)
1187                     return result;
1188             }
1189
1190             return JSRegExpNoMatch;
1191         }
1192         }
1193
1194         RELEASE_ASSERT_NOT_REACHED();
1195         return JSRegExpErrorNoMatch;
1196     }
1197
1198     bool matchDotStarEnclosure(ByteTerm& term, DisjunctionContext* context)
1199     {
1200         UNUSED_PARAM(term);
1201
1202         if (pattern->dotAll()) {
1203             context->matchBegin = startOffset;
1204             context->matchEnd = input.end();
1205             return true;
1206         }
1207
1208         unsigned matchBegin = context->matchBegin;
1209
1210         if (matchBegin > startOffset) {
1211             for (matchBegin--; true; matchBegin--) {
1212                 if (testCharacterClass(pattern->newlineCharacterClass, input.reread(matchBegin))) {
1213                     ++matchBegin;
1214                     break;
1215                 }
1216
1217                 if (matchBegin == startOffset)
1218                     break;
1219             }
1220         }
1221
1222         unsigned matchEnd = input.getPos();
1223
1224         for (; (matchEnd != input.end())
1225              && (!testCharacterClass(pattern->newlineCharacterClass, input.reread(matchEnd))); matchEnd++) { }
1226
1227         if (((matchBegin && term.anchors.m_bol)
1228              || ((matchEnd != input.end()) && term.anchors.m_eol))
1229             && !pattern->multiline())
1230             return false;
1231
1232         context->matchBegin = matchBegin;
1233         context->matchEnd = matchEnd;
1234         return true;
1235     }
1236
1237 #define MATCH_NEXT() { ++context->term; goto matchAgain; }
1238 #define BACKTRACK() { --context->term; goto backtrack; }
1239 #define currentTerm() (disjunction->terms[context->term])
1240     JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1241     {
1242         if (!--remainingMatchCount)
1243             return JSRegExpErrorHitLimit;
1244
1245         if (btrack)
1246             BACKTRACK();
1247
1248         context->matchBegin = input.getPos();
1249         context->term = 0;
1250
1251     matchAgain:
1252         ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1253
1254         switch (currentTerm().type) {
1255         case ByteTerm::TypeSubpatternBegin:
1256             MATCH_NEXT();
1257         case ByteTerm::TypeSubpatternEnd:
1258             context->matchEnd = input.getPos();
1259             return JSRegExpMatch;
1260
1261         case ByteTerm::TypeBodyAlternativeBegin:
1262             MATCH_NEXT();
1263         case ByteTerm::TypeBodyAlternativeDisjunction:
1264         case ByteTerm::TypeBodyAlternativeEnd:
1265             context->matchEnd = input.getPos();
1266             return JSRegExpMatch;
1267
1268         case ByteTerm::TypeAlternativeBegin:
1269             MATCH_NEXT();
1270         case ByteTerm::TypeAlternativeDisjunction:
1271         case ByteTerm::TypeAlternativeEnd: {
1272             int offset = currentTerm().alternative.end;
1273             BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1274             backTrack->offset = offset;
1275             context->term += offset;
1276             MATCH_NEXT();
1277         }
1278
1279         case ByteTerm::TypeAssertionBOL:
1280             if (matchAssertionBOL(currentTerm()))
1281                 MATCH_NEXT();
1282             BACKTRACK();
1283         case ByteTerm::TypeAssertionEOL:
1284             if (matchAssertionEOL(currentTerm()))
1285                 MATCH_NEXT();
1286             BACKTRACK();
1287         case ByteTerm::TypeAssertionWordBoundary:
1288             if (matchAssertionWordBoundary(currentTerm()))
1289                 MATCH_NEXT();
1290             BACKTRACK();
1291
1292         case ByteTerm::TypePatternCharacterOnce:
1293         case ByteTerm::TypePatternCharacterFixed: {
1294             if (unicode) {
1295                 if (!U_IS_BMP(currentTerm().atom.patternCharacter)) {
1296                     for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) {
1297                         if (!checkSurrogatePair(currentTerm().atom.patternCharacter, currentTerm().inputPosition - 2 * matchAmount)) {
1298                             BACKTRACK();
1299                         }
1300                     }
1301                     MATCH_NEXT();
1302                 }
1303             }
1304             unsigned position = input.getPos(); // May need to back out reading a surrogate pair.
1305
1306             for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) {
1307                 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - matchAmount)) {
1308                     input.setPos(position);
1309                     BACKTRACK();
1310                 }
1311             }
1312             MATCH_NEXT();
1313         }
1314         case ByteTerm::TypePatternCharacterGreedy: {
1315             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1316             unsigned matchAmount = 0;
1317             unsigned position = input.getPos(); // May need to back out reading a surrogate pair.
1318             while ((matchAmount < currentTerm().atom.quantityMaxCount) && input.checkInput(1)) {
1319                 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + 1)) {
1320                     input.setPos(position);
1321                     break;
1322                 }
1323                 ++matchAmount;
1324                 position = input.getPos();
1325             }
1326             backTrack->matchAmount = matchAmount;
1327
1328             MATCH_NEXT();
1329         }
1330         case ByteTerm::TypePatternCharacterNonGreedy: {
1331             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1332             backTrack->begin = input.getPos();
1333             backTrack->matchAmount = 0;
1334             MATCH_NEXT();
1335         }
1336
1337         case ByteTerm::TypePatternCasedCharacterOnce:
1338         case ByteTerm::TypePatternCasedCharacterFixed: {
1339             if (unicode) {
1340                 // Case insensitive matching of unicode characters is handled as TypeCharacterClass.
1341                 ASSERT(U_IS_BMP(currentTerm().atom.patternCharacter));
1342
1343                 unsigned position = input.getPos(); // May need to back out reading a surrogate pair.
1344                 
1345                 for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) {
1346                     if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount)) {
1347                         input.setPos(position);
1348                         BACKTRACK();
1349                     }
1350                 }
1351                 MATCH_NEXT();
1352             }
1353
1354             for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) {
1355                 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount))
1356                     BACKTRACK();
1357             }
1358             MATCH_NEXT();
1359         }
1360         case ByteTerm::TypePatternCasedCharacterGreedy: {
1361             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1362
1363             // Case insensitive matching of unicode characters is handled as TypeCharacterClass.
1364             ASSERT(!unicode || U_IS_BMP(currentTerm().atom.patternCharacter));
1365
1366             unsigned matchAmount = 0;
1367             while ((matchAmount < currentTerm().atom.quantityMaxCount) && input.checkInput(1)) {
1368                 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + 1)) {
1369                     input.uncheckInput(1);
1370                     break;
1371                 }
1372                 ++matchAmount;
1373             }
1374             backTrack->matchAmount = matchAmount;
1375
1376             MATCH_NEXT();
1377         }
1378         case ByteTerm::TypePatternCasedCharacterNonGreedy: {
1379             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1380
1381             // Case insensitive matching of unicode characters is handled as TypeCharacterClass.
1382             ASSERT(!unicode || U_IS_BMP(currentTerm().atom.patternCharacter));
1383             
1384             backTrack->matchAmount = 0;
1385             MATCH_NEXT();
1386         }
1387
1388         case ByteTerm::TypeCharacterClass:
1389             if (matchCharacterClass(currentTerm(), context))
1390                 MATCH_NEXT();
1391             BACKTRACK();
1392         case ByteTerm::TypeBackReference:
1393             if (matchBackReference(currentTerm(), context))
1394                 MATCH_NEXT();
1395             BACKTRACK();
1396         case ByteTerm::TypeParenthesesSubpattern: {
1397             JSRegExpResult result = matchParentheses(currentTerm(), context);
1398
1399             if (result == JSRegExpMatch) {
1400                 MATCH_NEXT();
1401             }  else if (result != JSRegExpNoMatch)
1402                 return result;
1403
1404             BACKTRACK();
1405         }
1406         case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1407             if (matchParenthesesOnceBegin(currentTerm(), context))
1408                 MATCH_NEXT();
1409             BACKTRACK();
1410         case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1411             if (matchParenthesesOnceEnd(currentTerm(), context))
1412                 MATCH_NEXT();
1413             BACKTRACK();
1414         case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1415             if (matchParenthesesTerminalBegin(currentTerm(), context))
1416                 MATCH_NEXT();
1417             BACKTRACK();
1418         case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1419             if (matchParenthesesTerminalEnd(currentTerm(), context))
1420                 MATCH_NEXT();
1421             BACKTRACK();
1422         case ByteTerm::TypeParentheticalAssertionBegin:
1423             if (matchParentheticalAssertionBegin(currentTerm(), context))
1424                 MATCH_NEXT();
1425             BACKTRACK();
1426         case ByteTerm::TypeParentheticalAssertionEnd:
1427             if (matchParentheticalAssertionEnd(currentTerm(), context))
1428                 MATCH_NEXT();
1429             BACKTRACK();
1430
1431         case ByteTerm::TypeCheckInput:
1432             if (input.checkInput(currentTerm().checkInputCount))
1433                 MATCH_NEXT();
1434             BACKTRACK();
1435
1436         case ByteTerm::TypeUncheckInput:
1437             input.uncheckInput(currentTerm().checkInputCount);
1438             MATCH_NEXT();
1439                 
1440         case ByteTerm::TypeDotStarEnclosure:
1441             if (matchDotStarEnclosure(currentTerm(), context))
1442                 return JSRegExpMatch;
1443             BACKTRACK();
1444         }
1445
1446         // We should never fall-through to here.
1447         RELEASE_ASSERT_NOT_REACHED();
1448
1449     backtrack:
1450         ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1451
1452         switch (currentTerm().type) {
1453         case ByteTerm::TypeSubpatternBegin:
1454             return JSRegExpNoMatch;
1455         case ByteTerm::TypeSubpatternEnd:
1456             RELEASE_ASSERT_NOT_REACHED();
1457
1458         case ByteTerm::TypeBodyAlternativeBegin:
1459         case ByteTerm::TypeBodyAlternativeDisjunction: {
1460             int offset = currentTerm().alternative.next;
1461             context->term += offset;
1462             if (offset > 0)
1463                 MATCH_NEXT();
1464
1465             if (input.atEnd() || pattern->sticky())
1466                 return JSRegExpNoMatch;
1467
1468             input.next();
1469
1470             context->matchBegin = input.getPos();
1471
1472             if (currentTerm().alternative.onceThrough)
1473                 context->term += currentTerm().alternative.next;
1474
1475             MATCH_NEXT();
1476         }
1477         case ByteTerm::TypeBodyAlternativeEnd:
1478             RELEASE_ASSERT_NOT_REACHED();
1479
1480         case ByteTerm::TypeAlternativeBegin:
1481         case ByteTerm::TypeAlternativeDisjunction: {
1482             int offset = currentTerm().alternative.next;
1483             context->term += offset;
1484             if (offset > 0)
1485                 MATCH_NEXT();
1486             BACKTRACK();
1487         }
1488         case ByteTerm::TypeAlternativeEnd: {
1489             // We should never backtrack back into an alternative of the main body of the regex.
1490             BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1491             unsigned offset = backTrack->offset;
1492             context->term -= offset;
1493             BACKTRACK();
1494         }
1495
1496         case ByteTerm::TypeAssertionBOL:
1497         case ByteTerm::TypeAssertionEOL:
1498         case ByteTerm::TypeAssertionWordBoundary:
1499             BACKTRACK();
1500
1501         case ByteTerm::TypePatternCharacterOnce:
1502         case ByteTerm::TypePatternCharacterFixed:
1503         case ByteTerm::TypePatternCharacterGreedy:
1504         case ByteTerm::TypePatternCharacterNonGreedy:
1505             if (backtrackPatternCharacter(currentTerm(), context))
1506                 MATCH_NEXT();
1507             BACKTRACK();
1508         case ByteTerm::TypePatternCasedCharacterOnce:
1509         case ByteTerm::TypePatternCasedCharacterFixed:
1510         case ByteTerm::TypePatternCasedCharacterGreedy:
1511         case ByteTerm::TypePatternCasedCharacterNonGreedy:
1512             if (backtrackPatternCasedCharacter(currentTerm(), context))
1513                 MATCH_NEXT();
1514             BACKTRACK();
1515         case ByteTerm::TypeCharacterClass:
1516             if (backtrackCharacterClass(currentTerm(), context))
1517                 MATCH_NEXT();
1518             BACKTRACK();
1519         case ByteTerm::TypeBackReference:
1520             if (backtrackBackReference(currentTerm(), context))
1521                 MATCH_NEXT();
1522             BACKTRACK();
1523         case ByteTerm::TypeParenthesesSubpattern: {
1524             JSRegExpResult result = backtrackParentheses(currentTerm(), context);
1525
1526             if (result == JSRegExpMatch) {
1527                 MATCH_NEXT();
1528             } else if (result != JSRegExpNoMatch)
1529                 return result;
1530
1531             BACKTRACK();
1532         }
1533         case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1534             if (backtrackParenthesesOnceBegin(currentTerm(), context))
1535                 MATCH_NEXT();
1536             BACKTRACK();
1537         case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1538             if (backtrackParenthesesOnceEnd(currentTerm(), context))
1539                 MATCH_NEXT();
1540             BACKTRACK();
1541         case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1542             if (backtrackParenthesesTerminalBegin(currentTerm(), context))
1543                 MATCH_NEXT();
1544             BACKTRACK();
1545         case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1546             if (backtrackParenthesesTerminalEnd(currentTerm(), context))
1547                 MATCH_NEXT();
1548             BACKTRACK();
1549         case ByteTerm::TypeParentheticalAssertionBegin:
1550             if (backtrackParentheticalAssertionBegin(currentTerm(), context))
1551                 MATCH_NEXT();
1552             BACKTRACK();
1553         case ByteTerm::TypeParentheticalAssertionEnd:
1554             if (backtrackParentheticalAssertionEnd(currentTerm(), context))
1555                 MATCH_NEXT();
1556             BACKTRACK();
1557
1558         case ByteTerm::TypeCheckInput:
1559             input.uncheckInput(currentTerm().checkInputCount);
1560             BACKTRACK();
1561
1562         case ByteTerm::TypeUncheckInput:
1563             input.checkInput(currentTerm().checkInputCount);
1564             BACKTRACK();
1565
1566         case ByteTerm::TypeDotStarEnclosure:
1567             RELEASE_ASSERT_NOT_REACHED();
1568         }
1569
1570         RELEASE_ASSERT_NOT_REACHED();
1571         return JSRegExpErrorNoMatch;
1572     }
1573
1574     JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1575     {
1576         JSRegExpResult result = matchDisjunction(disjunction, context, btrack);
1577
1578         if (result == JSRegExpMatch) {
1579             while (context->matchBegin == context->matchEnd) {
1580                 result = matchDisjunction(disjunction, context, true);
1581                 if (result != JSRegExpMatch)
1582                     return result;
1583             }
1584             return JSRegExpMatch;
1585         }
1586
1587         return result;
1588     }
1589
1590     unsigned interpret()
1591     {
1592         if (!input.isAvailableInput(0))
1593             return offsetNoMatch;
1594
1595         if (pattern->m_lock)
1596             pattern->m_lock->lock();
1597         
1598         for (unsigned i = 0; i < pattern->m_body->m_numSubpatterns + 1; ++i)
1599             output[i << 1] = offsetNoMatch;
1600
1601         allocatorPool = pattern->m_allocator->startAllocator();
1602         RELEASE_ASSERT(allocatorPool);
1603
1604         DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
1605
1606         JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false);
1607         if (result == JSRegExpMatch) {
1608             output[0] = context->matchBegin;
1609             output[1] = context->matchEnd;
1610         }
1611
1612         freeDisjunctionContext(context);
1613
1614         pattern->m_allocator->stopAllocator();
1615
1616         ASSERT((result == JSRegExpMatch) == (output[0] != offsetNoMatch));
1617
1618         if (pattern->m_lock)
1619             pattern->m_lock->unlock();
1620         
1621         return output[0];
1622     }
1623
1624     Interpreter(BytecodePattern* pattern, unsigned* output, const CharType* input, unsigned length, unsigned start)
1625         : pattern(pattern)
1626         , unicode(pattern->unicode())
1627         , output(output)
1628         , input(input, start, length, pattern->unicode())
1629         , allocatorPool(0)
1630         , startOffset(start)
1631         , remainingMatchCount(matchLimit)
1632     {
1633     }
1634
1635 private:
1636     BytecodePattern* pattern;
1637     bool unicode;
1638     unsigned* output;
1639     InputStream input;
1640     BumpPointerPool* allocatorPool;
1641     unsigned startOffset;
1642     unsigned remainingMatchCount;
1643 };
1644
1645 class ByteCompiler {
1646     struct ParenthesesStackEntry {
1647         unsigned beginTerm;
1648         unsigned savedAlternativeIndex;
1649         ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/)
1650             : beginTerm(beginTerm)
1651             , savedAlternativeIndex(savedAlternativeIndex)
1652         {
1653         }
1654     };
1655
1656 public:
1657     ByteCompiler(YarrPattern& pattern)
1658         : m_pattern(pattern)
1659     {
1660         m_currentAlternativeIndex = 0;
1661     }
1662
1663     std::unique_ptr<BytecodePattern> compile(BumpPointerAllocator* allocator, ConcurrentJSLock* lock)
1664     {
1665         regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough());
1666         emitDisjunction(m_pattern.m_body);
1667         regexEnd();
1668
1669         return std::make_unique<BytecodePattern>(WTFMove(m_bodyDisjunction), m_allParenthesesInfo, m_pattern, allocator, lock);
1670     }
1671
1672     void checkInput(unsigned count)
1673     {
1674         m_bodyDisjunction->terms.append(ByteTerm::CheckInput(count));
1675     }
1676
1677     void uncheckInput(unsigned count)
1678     {
1679         m_bodyDisjunction->terms.append(ByteTerm::UncheckInput(count));
1680     }
1681     
1682     void assertionBOL(unsigned inputPosition)
1683     {
1684         m_bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition));
1685     }
1686
1687     void assertionEOL(unsigned inputPosition)
1688     {
1689         m_bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition));
1690     }
1691
1692     void assertionWordBoundary(bool invert, unsigned inputPosition)
1693     {
1694         m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition));
1695     }
1696
1697     void atomPatternCharacter(UChar32 ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1698     {
1699         if (m_pattern.ignoreCase()) {
1700             UChar32 lo = u_tolower(ch);
1701             UChar32 hi = u_toupper(ch);
1702
1703             if (lo != hi) {
1704                 m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityMaxCount, quantityType));
1705                 return;
1706             }
1707         }
1708
1709         m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityMaxCount, quantityType));
1710     }
1711
1712     void atomCharacterClass(CharacterClass* characterClass, bool invert, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1713     {
1714         m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));
1715
1716         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1717         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
1718         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1719     }
1720
1721     void atomBackReference(unsigned subpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1722     {
1723         ASSERT(subpatternId);
1724
1725         m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
1726
1727         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1728         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
1729         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1730     }
1731
1732     void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1733     {
1734         int beginTerm = m_bodyDisjunction->terms.size();
1735
1736         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1737         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1738         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1739         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1740
1741         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1742         m_currentAlternativeIndex = beginTerm + 1;
1743     }
1744
1745     void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1746     {
1747         int beginTerm = m_bodyDisjunction->terms.size();
1748
1749         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition));
1750         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1751         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1752         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1753
1754         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1755         m_currentAlternativeIndex = beginTerm + 1;
1756     }
1757
1758     void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1759     {
1760         // Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin,
1761         // then fix this up at the end! - simplifying this should make it much clearer.
1762         // https://bugs.webkit.org/show_bug.cgi?id=50136
1763
1764         int beginTerm = m_bodyDisjunction->terms.size();
1765
1766         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1767         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1768         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1769         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1770
1771         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1772         m_currentAlternativeIndex = beginTerm + 1;
1773     }
1774
1775     void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation)
1776     {
1777         int beginTerm = m_bodyDisjunction->terms.size();
1778
1779         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, 0));
1780         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1781         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1782         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1783
1784         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1785         m_currentAlternativeIndex = beginTerm + 1;
1786     }
1787
1788     void atomParentheticalAssertionEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1789     {
1790         unsigned beginTerm = popParenthesesStack();
1791         closeAlternative(beginTerm + 1);
1792         unsigned endTerm = m_bodyDisjunction->terms.size();
1793
1794         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin);
1795
1796         bool invert = m_bodyDisjunction->terms[beginTerm].invert();
1797         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1798
1799         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd, subpatternId, false, invert, inputPosition));
1800         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1801         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1802         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1803
1804         m_bodyDisjunction->terms[beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1805         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1806         m_bodyDisjunction->terms[endTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1807         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1808     }
1809
1810     void assertionDotStarEnclosure(bool bolAnchored, bool eolAnchored)
1811     {
1812         m_bodyDisjunction->terms.append(ByteTerm::DotStarEnclosure(bolAnchored, eolAnchored));
1813     }
1814
1815     unsigned popParenthesesStack()
1816     {
1817         ASSERT(m_parenthesesStack.size());
1818         int stackEnd = m_parenthesesStack.size() - 1;
1819         unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm;
1820         m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex;
1821         m_parenthesesStack.shrink(stackEnd);
1822
1823         ASSERT(beginTerm < m_bodyDisjunction->terms.size());
1824         ASSERT(m_currentAlternativeIndex < m_bodyDisjunction->terms.size());
1825
1826         return beginTerm;
1827     }
1828
1829 #ifndef NDEBUG
1830     void dumpDisjunction(ByteDisjunction* disjunction)
1831     {
1832         dataLogF("ByteDisjunction(%p):\n\t", disjunction);
1833         for (unsigned i = 0; i < disjunction->terms.size(); ++i)
1834             dataLogF("{ %d } ", disjunction->terms[i].type);
1835         dataLogF("\n");
1836     }
1837 #endif
1838
1839     void closeAlternative(int beginTerm)
1840     {
1841         int origBeginTerm = beginTerm;
1842         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin);
1843         int endIndex = m_bodyDisjunction->terms.size();
1844
1845         unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
1846
1847         if (!m_bodyDisjunction->terms[beginTerm].alternative.next)
1848             m_bodyDisjunction->terms.remove(beginTerm);
1849         else {
1850             while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
1851                 beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
1852                 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction);
1853                 m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
1854                 m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1855             }
1856
1857             m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
1858
1859             m_bodyDisjunction->terms.append(ByteTerm::AlternativeEnd());
1860             m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
1861         }
1862     }
1863
1864     void closeBodyAlternative()
1865     {
1866         int beginTerm = 0;
1867         int origBeginTerm = 0;
1868         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);
1869         int endIndex = m_bodyDisjunction->terms.size();
1870
1871         unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
1872
1873         while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
1874             beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
1875             ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction);
1876             m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
1877             m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1878         }
1879
1880         m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
1881
1882         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd());
1883         m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
1884     }
1885
1886     void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMinCount, Checked<unsigned> quantityMaxCount, QuantifierType quantityType, unsigned callFrameSize = 0)
1887     {
1888         unsigned beginTerm = popParenthesesStack();
1889         closeAlternative(beginTerm + 1);
1890         unsigned endTerm = m_bodyDisjunction->terms.size();
1891
1892         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1893
1894         ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm];
1895
1896         bool capture = parenthesesBegin.capture();
1897         unsigned subpatternId = parenthesesBegin.atom.subpatternId;
1898
1899         unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
1900         auto parenthesesDisjunction = std::make_unique<ByteDisjunction>(numSubpatterns, callFrameSize);
1901
1902         unsigned firstTermInParentheses = beginTerm + 1;
1903         parenthesesDisjunction->terms.reserveInitialCapacity(endTerm - firstTermInParentheses + 2);
1904
1905         parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin());
1906         for (unsigned termInParentheses = firstTermInParentheses; termInParentheses < endTerm; ++termInParentheses)
1907             parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]);
1908         parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd());
1909
1910         m_bodyDisjunction->terms.shrink(beginTerm);
1911
1912         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction.get(), capture, inputPosition));
1913         m_allParenthesesInfo.append(WTFMove(parenthesesDisjunction));
1914
1915         m_bodyDisjunction->terms[beginTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
1916         m_bodyDisjunction->terms[beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1917         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1918         m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1919     }
1920
1921     void atomParenthesesOnceEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMinCount, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1922     {
1923         unsigned beginTerm = popParenthesesStack();
1924         closeAlternative(beginTerm + 1);
1925         unsigned endTerm = m_bodyDisjunction->terms.size();
1926
1927         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1928
1929         bool capture = m_bodyDisjunction->terms[beginTerm].capture();
1930         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1931
1932         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, capture, false, inputPosition));
1933         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1934         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1935         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1936
1937         m_bodyDisjunction->terms[beginTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
1938         m_bodyDisjunction->terms[beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1939         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1940         m_bodyDisjunction->terms[endTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
1941         m_bodyDisjunction->terms[endTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1942         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1943     }
1944
1945     void atomParenthesesTerminalEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMinCount, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1946     {
1947         unsigned beginTerm = popParenthesesStack();
1948         closeAlternative(beginTerm + 1);
1949         unsigned endTerm = m_bodyDisjunction->terms.size();
1950
1951         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
1952
1953         bool capture = m_bodyDisjunction->terms[beginTerm].capture();
1954         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1955
1956         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, capture, false, inputPosition));
1957         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1958         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1959         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1960
1961         m_bodyDisjunction->terms[beginTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
1962         m_bodyDisjunction->terms[beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1963         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1964         m_bodyDisjunction->terms[endTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
1965         m_bodyDisjunction->terms[endTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1966         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1967     }
1968
1969     void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough)
1970     {
1971         m_bodyDisjunction = std::make_unique<ByteDisjunction>(numSubpatterns, callFrameSize);
1972         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough));
1973         m_bodyDisjunction->terms[0].frameLocation = 0;
1974         m_currentAlternativeIndex = 0;
1975     }
1976
1977     void regexEnd()
1978     {
1979         closeBodyAlternative();
1980     }
1981
1982     void alternativeBodyDisjunction(bool onceThrough)
1983     {
1984         int newAlternativeIndex = m_bodyDisjunction->terms.size();
1985         m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
1986         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction(onceThrough));
1987
1988         m_currentAlternativeIndex = newAlternativeIndex;
1989     }
1990
1991     void alternativeDisjunction()
1992     {
1993         int newAlternativeIndex = m_bodyDisjunction->terms.size();
1994         m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
1995         m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction());
1996
1997         m_currentAlternativeIndex = newAlternativeIndex;
1998     }
1999
2000     void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0)
2001     {
2002         for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
2003             unsigned currentCountAlreadyChecked = inputCountAlreadyChecked;
2004
2005             PatternAlternative* alternative = disjunction->m_alternatives[alt].get();
2006
2007             if (alt) {
2008                 if (disjunction == m_pattern.m_body)
2009                     alternativeBodyDisjunction(alternative->onceThrough());
2010                 else
2011                     alternativeDisjunction();
2012             }
2013
2014             unsigned minimumSize = alternative->m_minimumSize;
2015             ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked);
2016             unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked;
2017
2018             if (countToCheck) {
2019                 checkInput(countToCheck);
2020                 currentCountAlreadyChecked += countToCheck;
2021             }
2022
2023             for (auto& term : alternative->m_terms) {
2024                 switch (term.type) {
2025                 case PatternTerm::TypeAssertionBOL:
2026                     assertionBOL(currentCountAlreadyChecked - term.inputPosition);
2027                     break;
2028
2029                 case PatternTerm::TypeAssertionEOL:
2030                     assertionEOL(currentCountAlreadyChecked - term.inputPosition);
2031                     break;
2032
2033                 case PatternTerm::TypeAssertionWordBoundary:
2034                     assertionWordBoundary(term.invert(), currentCountAlreadyChecked - term.inputPosition);
2035                     break;
2036
2037                 case PatternTerm::TypePatternCharacter:
2038                     atomPatternCharacter(term.patternCharacter, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityMaxCount, term.quantityType);
2039                     break;
2040
2041                 case PatternTerm::TypeCharacterClass:
2042                     atomCharacterClass(term.characterClass, term.invert(), currentCountAlreadyChecked- term.inputPosition, term.frameLocation, term.quantityMaxCount, term.quantityType);
2043                     break;
2044
2045                 case PatternTerm::TypeBackReference:
2046                     atomBackReference(term.backReferenceSubpatternId, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityMaxCount, term.quantityType);
2047                         break;
2048
2049                 case PatternTerm::TypeForwardReference:
2050                     break;
2051
2052                 case PatternTerm::TypeParenthesesSubpattern: {
2053                     unsigned disjunctionAlreadyCheckedCount = 0;
2054                     if (term.quantityMaxCount == 1 && !term.parentheses.isCopy) {
2055                         unsigned alternativeFrameLocation = term.frameLocation;
2056                         // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame.
2057                         if (term.quantityType == QuantifierFixedCount)
2058                             disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;
2059                         else
2060                             alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
2061                         ASSERT(currentCountAlreadyChecked >= term.inputPosition);
2062                         unsigned delegateEndInputOffset = currentCountAlreadyChecked - term.inputPosition;
2063                         atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount + delegateEndInputOffset, term.frameLocation, alternativeFrameLocation);
2064                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
2065                         atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityMinCount, term.quantityMaxCount, term.quantityType);
2066                     } else if (term.parentheses.isTerminal) {
2067                         ASSERT(currentCountAlreadyChecked >= term.inputPosition);
2068                         unsigned delegateEndInputOffset = currentCountAlreadyChecked - term.inputPosition;
2069                         atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount + delegateEndInputOffset, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesOnce);
2070                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
2071                         atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityMinCount, term.quantityMaxCount, term.quantityType);
2072                     } else {
2073                         ASSERT(currentCountAlreadyChecked >= term.inputPosition);
2074                         unsigned delegateEndInputOffset = currentCountAlreadyChecked - term.inputPosition;
2075                         atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount + delegateEndInputOffset, term.frameLocation, 0);
2076                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
2077                         atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityMinCount, term.quantityMaxCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
2078                     }
2079                     break;
2080                 }
2081
2082                 case PatternTerm::TypeParentheticalAssertion: {
2083                     unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion;
2084
2085                     ASSERT(currentCountAlreadyChecked >= term.inputPosition);
2086                     unsigned positiveInputOffset = currentCountAlreadyChecked - term.inputPosition;
2087                     unsigned uncheckAmount = 0;
2088                     if (positiveInputOffset > term.parentheses.disjunction->m_minimumSize) {
2089                         uncheckAmount = positiveInputOffset - term.parentheses.disjunction->m_minimumSize;
2090                         uncheckInput(uncheckAmount);
2091                         currentCountAlreadyChecked -= uncheckAmount;
2092                     }
2093
2094                     atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation);
2095                     emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset - uncheckAmount);
2096                     atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityMaxCount, term.quantityType);
2097                     if (uncheckAmount) {
2098                         checkInput(uncheckAmount);
2099                         currentCountAlreadyChecked += uncheckAmount;
2100                     }
2101                     break;
2102                 }
2103
2104                 case PatternTerm::TypeDotStarEnclosure:
2105                     assertionDotStarEnclosure(term.anchors.bolAnchor, term.anchors.eolAnchor);
2106                     break;
2107                 }
2108             }
2109         }
2110     }
2111
2112 private:
2113     YarrPattern& m_pattern;
2114     std::unique_ptr<ByteDisjunction> m_bodyDisjunction;
2115     unsigned m_currentAlternativeIndex;
2116     Vector<ParenthesesStackEntry> m_parenthesesStack;
2117     Vector<std::unique_ptr<ByteDisjunction>> m_allParenthesesInfo;
2118 };
2119
2120 std::unique_ptr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator, ConcurrentJSLock* lock)
2121 {
2122     return ByteCompiler(pattern).compile(allocator, lock);
2123 }
2124
2125 unsigned interpret(BytecodePattern* bytecode, const String& input, unsigned start, unsigned* output)
2126 {
2127     SuperSamplerScope superSamplerScope(false);
2128     if (input.is8Bit())
2129         return Interpreter<LChar>(bytecode, output, input.characters8(), input.length(), start).interpret();
2130     return Interpreter<UChar>(bytecode, output, input.characters16(), input.length(), start).interpret();
2131 }
2132
2133 unsigned interpret(BytecodePattern* bytecode, const LChar* input, unsigned length, unsigned start, unsigned* output)
2134 {
2135     SuperSamplerScope superSamplerScope(false);
2136     return Interpreter<LChar>(bytecode, output, input, length, start).interpret();
2137 }
2138
2139 unsigned interpret(BytecodePattern* bytecode, const UChar* input, unsigned length, unsigned start, unsigned* output)
2140 {
2141     SuperSamplerScope superSamplerScope(false);
2142     return Interpreter<UChar>(bytecode, output, input, length, start).interpret();
2143 }
2144
2145 // These should be the same for both UChar & LChar.
2146 COMPILE_ASSERT(sizeof(BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter);
2147 COMPILE_ASSERT(sizeof(BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass);
2148 COMPILE_ASSERT(sizeof(BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference);
2149 COMPILE_ASSERT(sizeof(BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative);
2150 COMPILE_ASSERT(sizeof(BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion);
2151 COMPILE_ASSERT(sizeof(BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce);
2152 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheses) == (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses);
2153
2154
2155 } }