c4b77cadbe5eea573ec852b95ef6a789b828536e
[WebKit-https.git] / Source / JavaScriptCore / yarr / YarrInterpreter.cpp
1 /*
2  * Copyright (C) 2009, 2013-2017 Apple Inc. All rights reserved.
3  * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26
27 #include "config.h"
28 #include "YarrInterpreter.h"
29
30 #include "Options.h"
31 #include "SuperSampler.h"
32 #include "Yarr.h"
33 #include "YarrCanonicalize.h"
34 #include <wtf/BumpPointerAllocator.h>
35 #include <wtf/CheckedArithmetic.h>
36 #include <wtf/DataLog.h>
37 #include <wtf/text/CString.h>
38 #include <wtf/text/WTFString.h>
39
40 namespace JSC { namespace Yarr {
41
42 template<typename CharType>
43 class Interpreter {
44 public:
45     struct ParenthesesDisjunctionContext;
46
47     struct BackTrackInfoParentheses {
48         uintptr_t matchAmount;
49         ParenthesesDisjunctionContext* lastContext;
50     };
51
52     static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context)
53     {
54         context->next = backTrack->lastContext;
55         backTrack->lastContext = context;
56         ++backTrack->matchAmount;
57     }
58
59     static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack)
60     {
61         RELEASE_ASSERT(backTrack->matchAmount);
62         RELEASE_ASSERT(backTrack->lastContext);
63         backTrack->lastContext = backTrack->lastContext->next;
64         --backTrack->matchAmount;
65     }
66
67     struct DisjunctionContext
68     {
69         DisjunctionContext() = default;
70
71         void* operator new(size_t, void* where)
72         {
73             return where;
74         }
75
76         static size_t allocationSize(unsigned numberOfFrames)
77         {
78             static_assert(alignof(DisjunctionContext) <= sizeof(void*), "");
79             size_t rawSize = (sizeof(DisjunctionContext) - sizeof(uintptr_t) + Checked<size_t>(numberOfFrames) * sizeof(uintptr_t)).unsafeGet();
80             size_t roundedSize = roundUpToMultipleOf<sizeof(void*)>(rawSize);
81             RELEASE_ASSERT(roundedSize >= rawSize);
82             return roundedSize;
83         }
84
85         int term { 0 };
86         unsigned matchBegin;
87         unsigned matchEnd;
88         uintptr_t frame[1];
89     };
90
91     DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction)
92     {
93         size_t size = DisjunctionContext::allocationSize(disjunction->m_frameSize);
94         allocatorPool = allocatorPool->ensureCapacity(size);
95         RELEASE_ASSERT(allocatorPool);
96         return new (allocatorPool->alloc(size)) DisjunctionContext();
97     }
98
99     void freeDisjunctionContext(DisjunctionContext* context)
100     {
101         allocatorPool = allocatorPool->dealloc(context);
102     }
103
104     struct ParenthesesDisjunctionContext
105     {
106         ParenthesesDisjunctionContext(unsigned* output, ByteTerm& term)
107         {
108             unsigned firstSubpatternId = term.atom.subpatternId;
109             unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns;
110
111             for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) {
112                 subpatternBackup[i] = output[(firstSubpatternId << 1) + i];
113                 output[(firstSubpatternId << 1) + i] = offsetNoMatch;
114             }
115
116             new (getDisjunctionContext(term)) DisjunctionContext();
117         }
118
119         void* operator new(size_t, void* where)
120         {
121             return where;
122         }
123
124         void restoreOutput(unsigned* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns)
125         {
126             for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i)
127                 output[(firstSubpatternId << 1) + i] = subpatternBackup[i];
128         }
129
130         DisjunctionContext* getDisjunctionContext(ByteTerm& term)
131         {
132             return bitwise_cast<DisjunctionContext*>(bitwise_cast<uintptr_t>(this) + allocationSize(term.atom.parenthesesDisjunction->m_numSubpatterns));
133         }
134
135         static size_t allocationSize(unsigned numberOfSubpatterns)
136         {
137             static_assert(alignof(ParenthesesDisjunctionContext) <= sizeof(void*), "");
138             size_t rawSize = (sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (Checked<size_t>(numberOfSubpatterns) * 2U) * sizeof(unsigned)).unsafeGet();
139             size_t roundedSize = roundUpToMultipleOf<sizeof(void*)>(rawSize);
140             RELEASE_ASSERT(roundedSize >= rawSize);
141             return roundedSize;
142         }
143
144         ParenthesesDisjunctionContext* next { nullptr };
145         unsigned subpatternBackup[1];
146     };
147
148     ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, unsigned* output, ByteTerm& term)
149     {
150         size_t size = (Checked<size_t>(ParenthesesDisjunctionContext::allocationSize(term.atom.parenthesesDisjunction->m_numSubpatterns)) + DisjunctionContext::allocationSize(disjunction->m_frameSize)).unsafeGet();
151         allocatorPool = allocatorPool->ensureCapacity(size);
152         RELEASE_ASSERT(allocatorPool);
153         return new (allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term);
154     }
155
156     void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context)
157     {
158         allocatorPool = allocatorPool->dealloc(context);
159     }
160
161     class InputStream {
162     public:
163         InputStream(const CharType* input, unsigned start, unsigned length, bool decodeSurrogatePairs)
164             : input(input)
165             , pos(start)
166             , length(length)
167             , decodeSurrogatePairs(decodeSurrogatePairs)
168         {
169         }
170
171         void next()
172         {
173             ++pos;
174         }
175
176         void rewind(unsigned amount)
177         {
178             ASSERT(pos >= amount);
179             pos -= amount;
180         }
181
182         int read()
183         {
184             ASSERT(pos < length);
185             if (pos < length)
186                 return input[pos];
187             return -1;
188         }
189
190         int readPair()
191         {
192             ASSERT(pos + 1 < length);
193             return input[pos] | input[pos + 1] << 16;
194         }
195
196         int readChecked(unsigned negativePositionOffest)
197         {
198             RELEASE_ASSERT(pos >= negativePositionOffest);
199             unsigned p = pos - negativePositionOffest;
200             ASSERT(p < length);
201             int result = input[p];
202             if (U16_IS_LEAD(result) && decodeSurrogatePairs && p + 1 < length && U16_IS_TRAIL(input[p + 1])) {
203                 if (atEnd())
204                     return -1;
205                 
206                 result = U16_GET_SUPPLEMENTARY(result, input[p + 1]);
207                 next();
208             }
209             return result;
210         }
211         
212         int readSurrogatePairChecked(unsigned negativePositionOffset)
213         {
214             RELEASE_ASSERT(pos >= negativePositionOffset);
215             unsigned p = pos - negativePositionOffset;
216             ASSERT(p < length);
217             if (p + 1 >= length)
218                 return -1;
219
220             int first = input[p];
221             int second = input[p + 1];
222             if (U16_IS_LEAD(first) && U16_IS_TRAIL(second))
223                 return U16_GET_SUPPLEMENTARY(first, second);
224
225             return -1;
226         }
227
228         int reread(unsigned from)
229         {
230             ASSERT(from < length);
231             int result = input[from];
232             if (U16_IS_LEAD(result) && decodeSurrogatePairs && from + 1 < length && U16_IS_TRAIL(input[from + 1]))
233                 result = U16_GET_SUPPLEMENTARY(result, input[from + 1]);
234             return result;
235         }
236
237         int prev()
238         {
239             ASSERT(!(pos > length));
240             if (pos && length)
241                 return input[pos - 1];
242             return -1;
243         }
244
245         unsigned getPos()
246         {
247             return pos;
248         }
249
250         void setPos(unsigned p)
251         {
252             pos = p;
253         }
254
255         bool atStart()
256         {
257             return pos == 0;
258         }
259
260         bool atEnd()
261         {
262             return pos == length;
263         }
264
265         unsigned end()
266         {
267             return length;
268         }
269
270         bool checkInput(unsigned count)
271         {
272             if (((pos + count) <= length) && ((pos + count) >= pos)) {
273                 pos += count;
274                 return true;
275             }
276             return false;
277         }
278
279         void uncheckInput(unsigned count)
280         {
281             RELEASE_ASSERT(pos >= count);
282             pos -= count;
283         }
284
285         bool atStart(unsigned negativePositionOffset)
286         {
287             return pos == negativePositionOffset;
288         }
289
290         bool atEnd(unsigned negativePositionOffest)
291         {
292             RELEASE_ASSERT(pos >= negativePositionOffest);
293             return (pos - negativePositionOffest) == length;
294         }
295
296         bool isAvailableInput(unsigned offset)
297         {
298             return (((pos + offset) <= length) && ((pos + offset) >= pos));
299         }
300
301     private:
302         const CharType* input;
303         unsigned pos;
304         unsigned length;
305         bool decodeSurrogatePairs;
306     };
307
308     bool testCharacterClass(CharacterClass* characterClass, int ch)
309     {
310         auto linearSearchMatches = [&ch](const Vector<UChar32>& matches) {
311             for (unsigned i = 0; i < matches.size(); ++i) {
312                 if (ch == matches[i])
313                     return true;
314             }
315
316             return false;
317         };
318
319         auto binarySearchMatches = [&ch](const Vector<UChar32>& matches) {
320             size_t low = 0;
321             size_t high = matches.size() - 1;
322
323             while (low <= high) {
324                 size_t mid = low + (high - low) / 2;
325                 int diff = ch - matches[mid];
326                 if (!diff)
327                     return true;
328
329                 if (diff < 0) {
330                     if (mid == low)
331                         return false;
332                     high = mid - 1;
333                 } else
334                     low = mid + 1;
335             }
336             return false;
337         };
338
339         auto linearSearchRanges = [&ch](const Vector<CharacterRange>& ranges) {
340             for (unsigned i = 0; i < ranges.size(); ++i) {
341                 if ((ch >= ranges[i].begin) && (ch <= ranges[i].end))
342                     return true;
343             }
344
345             return false;
346         };
347
348         auto binarySearchRanges = [&ch](const Vector<CharacterRange>& ranges) {
349             size_t low = 0;
350             size_t high = ranges.size() - 1;
351
352             while (low <= high) {
353                 size_t mid = low + (high - low) / 2;
354                 int rangeBeginDiff = ch - ranges[mid].begin;
355                 if (rangeBeginDiff >= 0 && ch <= ranges[mid].end)
356                     return true;
357
358                 if (rangeBeginDiff < 0) {
359                     if (mid == low)
360                         return false;
361                     high = mid - 1;
362                 } else
363                     low = mid + 1;
364             }
365             return false;
366         };
367
368         if (characterClass->m_anyCharacter)
369             return true;
370
371         const size_t thresholdForBinarySearch = 6;
372
373         if (!isASCII(ch)) {
374             if (characterClass->m_matchesUnicode.size()) {
375                 if (characterClass->m_matchesUnicode.size() > thresholdForBinarySearch) {
376                     if (binarySearchMatches(characterClass->m_matchesUnicode))
377                         return true;
378                 } else if (linearSearchMatches(characterClass->m_matchesUnicode))
379                     return true;
380             }
381
382             if (characterClass->m_rangesUnicode.size()) {
383                 if (characterClass->m_rangesUnicode.size() > thresholdForBinarySearch) {
384                     if (binarySearchRanges(characterClass->m_rangesUnicode))
385                         return true;
386                 } else if (linearSearchRanges(characterClass->m_rangesUnicode))
387                     return true;
388             }
389         } else {
390             if (characterClass->m_matches.size()) {
391                 if (characterClass->m_matches.size() > thresholdForBinarySearch) {
392                     if (binarySearchMatches(characterClass->m_matches))
393                         return true;
394                 } else if (linearSearchMatches(characterClass->m_matches))
395                     return true;
396             }
397
398             if (characterClass->m_ranges.size()) {
399                 if (characterClass->m_ranges.size() > thresholdForBinarySearch) {
400                     if (binarySearchRanges(characterClass->m_ranges))
401                         return true;
402                 } else if (linearSearchRanges(characterClass->m_ranges))
403                     return true;
404             }
405         }
406
407         return false;
408     }
409
410     bool checkCharacter(int testChar, unsigned negativeInputOffset)
411     {
412         return testChar == input.readChecked(negativeInputOffset);
413     }
414
415     bool checkSurrogatePair(int testUnicodeChar, unsigned negativeInputOffset)
416     {
417         return testUnicodeChar == input.readSurrogatePairChecked(negativeInputOffset);
418     }
419
420     bool checkCasedCharacter(int loChar, int hiChar, unsigned negativeInputOffset)
421     {
422         int ch = input.readChecked(negativeInputOffset);
423         return (loChar == ch) || (hiChar == ch);
424     }
425
426     bool checkCharacterClass(CharacterClass* characterClass, bool invert, unsigned negativeInputOffset)
427     {
428         bool match = testCharacterClass(characterClass, input.readChecked(negativeInputOffset));
429         return invert ? !match : match;
430     }
431     
432     bool checkCharacterClassDontAdvanceInputForNonBMP(CharacterClass* characterClass, unsigned negativeInputOffset)
433     {
434         int readCharacter = characterClass->hasOnlyNonBMPCharacters() ? input.readSurrogatePairChecked(negativeInputOffset) :  input.readChecked(negativeInputOffset);
435         return testCharacterClass(characterClass, readCharacter);
436     }
437
438     bool tryConsumeBackReference(int matchBegin, int matchEnd, unsigned negativeInputOffset)
439     {
440         unsigned matchSize = (unsigned)(matchEnd - matchBegin);
441
442         if (!input.checkInput(matchSize))
443             return false;
444
445         for (unsigned i = 0; i < matchSize; ++i) {
446             int oldCh = input.reread(matchBegin + i);
447             int ch;
448             if (!U_IS_BMP(oldCh)) {
449                 ch = input.readSurrogatePairChecked(negativeInputOffset + matchSize - i);
450                 ++i;
451             } else
452                 ch = input.readChecked(negativeInputOffset + matchSize - i);
453
454             if (oldCh == ch)
455                 continue;
456
457             if (pattern->ignoreCase()) {
458                 // See ES 6.0, 21.2.2.8.2 for the definition of Canonicalize(). For non-Unicode
459                 // patterns, Unicode values are never allowed to match against ASCII ones.
460                 // For Unicode, we need to check all canonical equivalents of a character.
461                 if (!unicode && (isASCII(oldCh) || isASCII(ch))) {
462                     if (toASCIIUpper(oldCh) == toASCIIUpper(ch))
463                         continue;
464                 } else if (areCanonicallyEquivalent(oldCh, ch, unicode ? CanonicalMode::Unicode : CanonicalMode::UCS2))
465                     continue;
466             }
467
468             input.uncheckInput(matchSize);
469             return false;
470         }
471
472         return true;
473     }
474
475     bool matchAssertionBOL(ByteTerm& term)
476     {
477         return (input.atStart(term.inputPosition)) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + 1)));
478     }
479
480     bool matchAssertionEOL(ByteTerm& term)
481     {
482         if (term.inputPosition)
483             return (input.atEnd(term.inputPosition)) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
484
485         return (input.atEnd()) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.read()));
486     }
487
488     bool matchAssertionWordBoundary(ByteTerm& term)
489     {
490         bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition + 1));
491         bool readIsWordchar;
492         if (term.inputPosition)
493             readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition));
494         else
495             readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read());
496
497         bool wordBoundary = prevIsWordchar != readIsWordchar;
498         return term.invert() ? !wordBoundary : wordBoundary;
499     }
500
501     bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context)
502     {
503         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
504
505         switch (term.atom.quantityType) {
506         case QuantifierFixedCount:
507             break;
508
509         case QuantifierGreedy:
510             if (backTrack->matchAmount) {
511                 --backTrack->matchAmount;
512                 input.uncheckInput(U16_LENGTH(term.atom.patternCharacter));
513                 return true;
514             }
515             break;
516
517         case QuantifierNonGreedy:
518             if ((backTrack->matchAmount < term.atom.quantityMaxCount) && input.checkInput(1)) {
519                 ++backTrack->matchAmount;
520                 if (checkCharacter(term.atom.patternCharacter, term.inputPosition + 1))
521                     return true;
522             }
523             input.setPos(backTrack->begin);
524             break;
525         }
526
527         return false;
528     }
529
530     bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context)
531     {
532         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
533
534         switch (term.atom.quantityType) {
535         case QuantifierFixedCount:
536             break;
537
538         case QuantifierGreedy:
539             if (backTrack->matchAmount) {
540                 --backTrack->matchAmount;
541                 input.uncheckInput(1);
542                 return true;
543             }
544             break;
545
546         case QuantifierNonGreedy:
547             if ((backTrack->matchAmount < term.atom.quantityMaxCount) && input.checkInput(1)) {
548                 ++backTrack->matchAmount;
549                 if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition + 1))
550                     return true;
551             }
552             input.uncheckInput(backTrack->matchAmount);
553             break;
554         }
555
556         return false;
557     }
558
559     bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context)
560     {
561         ASSERT(term.type == ByteTerm::TypeCharacterClass);
562         BackTrackInfoCharacterClass* backTrack = reinterpret_cast<BackTrackInfoCharacterClass*>(context->frame + term.frameLocation);
563
564         switch (term.atom.quantityType) {
565         case QuantifierFixedCount: {
566             if (unicode) {
567                 CharacterClass* charClass = term.atom.characterClass;
568                 backTrack->begin = input.getPos();
569                 unsigned matchAmount = 0;
570                 for (matchAmount = 0; matchAmount < term.atom.quantityMaxCount; ++matchAmount) {
571                     if (term.invert()) {
572                         if (!checkCharacterClass(charClass, term.invert(), term.inputPosition - matchAmount)) {
573                             input.setPos(backTrack->begin);
574                             return false;
575                         }
576                     } else {
577                         unsigned matchOffset = matchAmount * (charClass->hasOnlyNonBMPCharacters() ? 2 : 1);
578                         if (!checkCharacterClassDontAdvanceInputForNonBMP(charClass, term.inputPosition - matchOffset)) {
579                             input.setPos(backTrack->begin);
580                             return false;
581                         }
582                     }
583                 }
584
585                 return true;
586             }
587
588             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityMaxCount; ++matchAmount) {
589                 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount))
590                     return false;
591             }
592             return true;
593         }
594
595         case QuantifierGreedy: {
596             unsigned position = input.getPos();
597             backTrack->begin = position;
598             unsigned matchAmount = 0;
599             while ((matchAmount < term.atom.quantityMaxCount) && input.checkInput(1)) {
600                 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) {
601                     input.setPos(position);
602                     break;
603                 }
604                 ++matchAmount;
605                 position = input.getPos();
606             }
607             backTrack->matchAmount = matchAmount;
608
609             return true;
610         }
611
612         case QuantifierNonGreedy:
613             backTrack->begin = input.getPos();
614             backTrack->matchAmount = 0;
615             return true;
616         }
617
618         RELEASE_ASSERT_NOT_REACHED();
619         return false;
620     }
621
622     bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context)
623     {
624         ASSERT(term.type == ByteTerm::TypeCharacterClass);
625         BackTrackInfoCharacterClass* backTrack = reinterpret_cast<BackTrackInfoCharacterClass*>(context->frame + term.frameLocation);
626
627         switch (term.atom.quantityType) {
628         case QuantifierFixedCount:
629             if (unicode)
630                 input.setPos(backTrack->begin);
631             break;
632
633         case QuantifierGreedy:
634             if (backTrack->matchAmount) {
635                 if (unicode) {
636                     // Rematch one less match
637                     input.setPos(backTrack->begin);
638                     --backTrack->matchAmount;
639                     for (unsigned matchAmount = 0; (matchAmount < backTrack->matchAmount) && input.checkInput(1); ++matchAmount) {
640                         if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) {
641                             input.uncheckInput(1);
642                             break;
643                         }
644                     }
645                     return true;
646                 }
647                 --backTrack->matchAmount;
648                 input.uncheckInput(1);
649                 return true;
650             }
651             break;
652
653         case QuantifierNonGreedy:
654             if ((backTrack->matchAmount < term.atom.quantityMaxCount) && input.checkInput(1)) {
655                 ++backTrack->matchAmount;
656                 if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1))
657                     return true;
658             }
659             input.setPos(backTrack->begin);
660             break;
661         }
662
663         return false;
664     }
665
666     bool matchBackReference(ByteTerm& term, DisjunctionContext* context)
667     {
668         ASSERT(term.type == ByteTerm::TypeBackReference);
669         BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
670
671         unsigned matchBegin = output[(term.atom.subpatternId << 1)];
672         unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1];
673
674         // If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that.
675         // In this case the result of match is empty string like when it references to a parentheses with zero-width match.
676         // Eg.: /(a\1)/
677         if (matchEnd == offsetNoMatch)
678             return true;
679
680         if (matchBegin == offsetNoMatch)
681             return true;
682
683         ASSERT(matchBegin <= matchEnd);
684
685         if (matchBegin == matchEnd)
686             return true;
687
688         switch (term.atom.quantityType) {
689         case QuantifierFixedCount: {
690             backTrack->begin = input.getPos();
691             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityMaxCount; ++matchAmount) {
692                 if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
693                     input.setPos(backTrack->begin);
694                     return false;
695                 }
696             }
697             return true;
698         }
699
700         case QuantifierGreedy: {
701             unsigned matchAmount = 0;
702             while ((matchAmount < term.atom.quantityMaxCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition))
703                 ++matchAmount;
704             backTrack->matchAmount = matchAmount;
705             return true;
706         }
707
708         case QuantifierNonGreedy:
709             backTrack->begin = input.getPos();
710             backTrack->matchAmount = 0;
711             return true;
712         }
713
714         RELEASE_ASSERT_NOT_REACHED();
715         return false;
716     }
717
718     bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context)
719     {
720         ASSERT(term.type == ByteTerm::TypeBackReference);
721         BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
722
723         unsigned matchBegin = output[(term.atom.subpatternId << 1)];
724         unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1];
725
726         if (matchBegin == offsetNoMatch)
727             return false;
728
729         ASSERT(matchBegin <= matchEnd);
730
731         if (matchBegin == matchEnd)
732             return false;
733
734         switch (term.atom.quantityType) {
735         case QuantifierFixedCount:
736             // for quantityMaxCount == 1, could rewind.
737             input.setPos(backTrack->begin);
738             break;
739
740         case QuantifierGreedy:
741             if (backTrack->matchAmount) {
742                 --backTrack->matchAmount;
743                 input.rewind(matchEnd - matchBegin);
744                 return true;
745             }
746             break;
747
748         case QuantifierNonGreedy:
749             if ((backTrack->matchAmount < term.atom.quantityMaxCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
750                 ++backTrack->matchAmount;
751                 return true;
752             }
753             input.setPos(backTrack->begin);
754             break;
755         }
756
757         return false;
758     }
759
760     void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context)
761     {
762         if (term.capture()) {
763             unsigned subpatternId = term.atom.subpatternId;
764             output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin - term.inputPosition;
765             output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd - term.inputPosition;
766         }
767     }
768     void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context)
769     {
770         unsigned firstSubpatternId = term.atom.subpatternId;
771         unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
772         context->restoreOutput(output, firstSubpatternId, count);
773     }
774     JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
775     {
776         while (backTrack->matchAmount) {
777             ParenthesesDisjunctionContext* context = backTrack->lastContext;
778
779             JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true);
780             if (result == JSRegExpMatch)
781                 return JSRegExpMatch;
782
783             resetMatches(term, context);
784             popParenthesesDisjunctionContext(backTrack);
785             freeParenthesesDisjunctionContext(context);
786
787             if (result != JSRegExpNoMatch)
788                 return result;
789         }
790
791         return JSRegExpNoMatch;
792     }
793
794     bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
795     {
796         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
797         ASSERT(term.atom.quantityMaxCount == 1);
798
799         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
800
801         switch (term.atom.quantityType) {
802         case QuantifierGreedy: {
803             // set this speculatively; if we get to the parens end this will be true.
804             backTrack->begin = input.getPos();
805             break;
806         }
807         case QuantifierNonGreedy: {
808             backTrack->begin = notFound;
809             context->term += term.atom.parenthesesWidth;
810             return true;
811         }
812         case QuantifierFixedCount:
813             break;
814         }
815
816         if (term.capture()) {
817             unsigned subpatternId = term.atom.subpatternId;
818             output[(subpatternId << 1)] = input.getPos() - term.inputPosition;
819         }
820
821         return true;
822     }
823
824     bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
825     {
826         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
827         ASSERT(term.atom.quantityMaxCount == 1);
828
829         if (term.capture()) {
830             unsigned subpatternId = term.atom.subpatternId;
831             output[(subpatternId << 1) + 1] = input.getPos() - term.inputPosition;
832         }
833
834         if (term.atom.quantityType == QuantifierFixedCount)
835             return true;
836
837         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
838         return backTrack->begin != input.getPos();
839     }
840
841     bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
842     {
843         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
844         ASSERT(term.atom.quantityMaxCount == 1);
845
846         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
847
848         if (term.capture()) {
849             unsigned subpatternId = term.atom.subpatternId;
850             output[(subpatternId << 1)] = offsetNoMatch;
851             output[(subpatternId << 1) + 1] = offsetNoMatch;
852         }
853
854         switch (term.atom.quantityType) {
855         case QuantifierGreedy:
856             // if we backtrack to this point, there is another chance - try matching nothing.
857             ASSERT(backTrack->begin != notFound);
858             backTrack->begin = notFound;
859             context->term += term.atom.parenthesesWidth;
860             return true;
861         case QuantifierNonGreedy:
862             ASSERT(backTrack->begin != notFound);
863             FALLTHROUGH;
864         case QuantifierFixedCount:
865             break;
866         }
867
868         return false;
869     }
870
871     bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
872     {
873         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
874         ASSERT(term.atom.quantityMaxCount == 1);
875
876         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
877
878         switch (term.atom.quantityType) {
879         case QuantifierGreedy:
880             if (backTrack->begin == notFound) {
881                 context->term -= term.atom.parenthesesWidth;
882                 return false;
883             }
884             FALLTHROUGH;
885         case QuantifierNonGreedy:
886             if (backTrack->begin == notFound) {
887                 backTrack->begin = input.getPos();
888                 if (term.capture()) {
889                     // Technically this access to inputPosition should be accessing the begin term's
890                     // inputPosition, but for repeats other than fixed these values should be
891                     // the same anyway! (We don't pre-check for greedy or non-greedy matches.)
892                     ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
893                     ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition);
894                     unsigned subpatternId = term.atom.subpatternId;
895                     output[subpatternId << 1] = input.getPos() - term.inputPosition;
896                 }
897                 context->term -= term.atom.parenthesesWidth;
898                 return true;
899             }
900             FALLTHROUGH;
901         case QuantifierFixedCount:
902             break;
903         }
904
905         return false;
906     }
907
908     bool matchParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
909     {
910         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
911         ASSERT(term.atom.quantityType == QuantifierGreedy);
912         ASSERT(term.atom.quantityMaxCount == quantifyInfinite);
913         ASSERT(!term.capture());
914
915         BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
916         backTrack->begin = input.getPos();
917         return true;
918     }
919
920     bool matchParenthesesTerminalEnd(ByteTerm& term, DisjunctionContext* context)
921     {
922         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalEnd);
923
924         BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
925         // Empty match is a failed match.
926         if (backTrack->begin == input.getPos())
927             return false;
928
929         // Successful match! Okay, what's next? - loop around and try to match more!
930         context->term -= (term.atom.parenthesesWidth + 1);
931         return true;
932     }
933
934     bool backtrackParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
935     {
936         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
937         ASSERT(term.atom.quantityType == QuantifierGreedy);
938         ASSERT(term.atom.quantityMaxCount == quantifyInfinite);
939         ASSERT(!term.capture());
940
941         // If we backtrack to this point, we have failed to match this iteration of the parens.
942         // Since this is greedy / zero minimum a failed is also accepted as a match!
943         context->term += term.atom.parenthesesWidth;
944         return true;
945     }
946
947     bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*)
948     {
949         // 'Terminal' parentheses are at the end of the regex, and as such a match past end
950         // should always be returned as a successful match - we should never backtrack to here.
951         RELEASE_ASSERT_NOT_REACHED();
952         return false;
953     }
954
955     bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
956     {
957         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
958         ASSERT(term.atom.quantityMaxCount == 1);
959
960         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
961
962         backTrack->begin = input.getPos();
963         return true;
964     }
965
966     bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
967     {
968         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
969         ASSERT(term.atom.quantityMaxCount == 1);
970
971         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
972
973         input.setPos(backTrack->begin);
974
975         // We've reached the end of the parens; if they are inverted, this is failure.
976         if (term.invert()) {
977             context->term -= term.atom.parenthesesWidth;
978             return false;
979         }
980
981         return true;
982     }
983
984     bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
985     {
986         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
987         ASSERT(term.atom.quantityMaxCount == 1);
988
989         // We've failed to match parens; if they are inverted, this is win!
990         if (term.invert()) {
991             context->term += term.atom.parenthesesWidth;
992             return true;
993         }
994
995         return false;
996     }
997
998     bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
999     {
1000         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
1001         ASSERT(term.atom.quantityMaxCount == 1);
1002
1003         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
1004
1005         input.setPos(backTrack->begin);
1006
1007         context->term -= term.atom.parenthesesWidth;
1008         return false;
1009     }
1010
1011     JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context)
1012     {
1013         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
1014
1015         BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
1016         ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
1017
1018         backTrack->matchAmount = 0;
1019         backTrack->lastContext = 0;
1020
1021         ASSERT(term.atom.quantityType != QuantifierFixedCount || term.atom.quantityMinCount == term.atom.quantityMaxCount);
1022
1023         unsigned minimumMatchCount = term.atom.quantityMinCount;
1024         JSRegExpResult fixedMatchResult;
1025
1026         // Handle fixed matches and the minimum part of a variable length match.
1027         if (minimumMatchCount) {
1028             // While we haven't yet reached our fixed limit,
1029             while (backTrack->matchAmount < minimumMatchCount) {
1030                 // Try to do a match, and it it succeeds, add it to the list.
1031                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1032                 fixedMatchResult = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1033                 if (fixedMatchResult == JSRegExpMatch)
1034                     appendParenthesesDisjunctionContext(backTrack, context);
1035                 else {
1036                     // The match failed; try to find an alternate point to carry on from.
1037                     resetMatches(term, context);
1038                     freeParenthesesDisjunctionContext(context);
1039                     
1040                     if (fixedMatchResult != JSRegExpNoMatch)
1041                         return fixedMatchResult;
1042                     JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
1043                     if (backtrackResult != JSRegExpMatch)
1044                         return backtrackResult;
1045                 }
1046             }
1047
1048             ParenthesesDisjunctionContext* context = backTrack->lastContext;
1049             recordParenthesesMatch(term, context);
1050         }
1051
1052         switch (term.atom.quantityType) {
1053         case QuantifierFixedCount: {
1054             ASSERT(backTrack->matchAmount == term.atom.quantityMaxCount);
1055             return JSRegExpMatch;
1056         }
1057
1058         case QuantifierGreedy: {
1059             while (backTrack->matchAmount < term.atom.quantityMaxCount) {
1060                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1061                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1062                 if (result == JSRegExpMatch)
1063                     appendParenthesesDisjunctionContext(backTrack, context);
1064                 else {
1065                     resetMatches(term, context);
1066                     freeParenthesesDisjunctionContext(context);
1067
1068                     if (result != JSRegExpNoMatch)
1069                         return result;
1070
1071                     break;
1072                 }
1073             }
1074
1075             if (backTrack->matchAmount) {
1076                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
1077                 recordParenthesesMatch(term, context);
1078             }
1079             return JSRegExpMatch;
1080         }
1081
1082         case QuantifierNonGreedy:
1083             return JSRegExpMatch;
1084         }
1085
1086         RELEASE_ASSERT_NOT_REACHED();
1087         return JSRegExpErrorNoMatch;
1088     }
1089
1090     // Rules for backtracking differ depending on whether this is greedy or non-greedy.
1091     //
1092     // Greedy matches never should try just adding more - you should already have done
1093     // the 'more' cases.  Always backtrack, at least a leetle bit.  However cases where
1094     // you backtrack an item off the list needs checking, since we'll never have matched
1095     // the one less case.  Tracking forwards, still add as much as possible.
1096     //
1097     // Non-greedy, we've already done the one less case, so don't match on popping.
1098     // We haven't done the one more case, so always try to add that.
1099     //
1100     JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
1101     {
1102         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
1103
1104         BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
1105         ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
1106
1107         switch (term.atom.quantityType) {
1108         case QuantifierFixedCount: {
1109             ASSERT(backTrack->matchAmount == term.atom.quantityMaxCount);
1110
1111             ParenthesesDisjunctionContext* context = 0;
1112             JSRegExpResult result = parenthesesDoBacktrack(term, backTrack);
1113
1114             if (result != JSRegExpMatch)
1115                 return result;
1116
1117             // While we haven't yet reached our fixed limit,
1118             while (backTrack->matchAmount < term.atom.quantityMaxCount) {
1119                 // Try to do a match, and it it succeeds, add it to the list.
1120                 context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1121                 result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1122
1123                 if (result == JSRegExpMatch)
1124                     appendParenthesesDisjunctionContext(backTrack, context);
1125                 else {
1126                     // The match failed; try to find an alternate point to carry on from.
1127                     resetMatches(term, context);
1128                     freeParenthesesDisjunctionContext(context);
1129
1130                     if (result != JSRegExpNoMatch)
1131                         return result;
1132                     JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
1133                     if (backtrackResult != JSRegExpMatch)
1134                         return backtrackResult;
1135                 }
1136             }
1137
1138             ASSERT(backTrack->matchAmount == term.atom.quantityMaxCount);
1139             context = backTrack->lastContext;
1140             recordParenthesesMatch(term, context);
1141             return JSRegExpMatch;
1142         }
1143
1144         case QuantifierGreedy: {
1145             if (!backTrack->matchAmount)
1146                 return JSRegExpNoMatch;
1147
1148             ParenthesesDisjunctionContext* context = backTrack->lastContext;
1149             JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
1150             if (result == JSRegExpMatch) {
1151                 while (backTrack->matchAmount < term.atom.quantityMaxCount) {
1152                     ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1153                     JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1154                     if (parenthesesResult == JSRegExpMatch)
1155                         appendParenthesesDisjunctionContext(backTrack, context);
1156                     else {
1157                         resetMatches(term, context);
1158                         freeParenthesesDisjunctionContext(context);
1159
1160                         if (parenthesesResult != JSRegExpNoMatch)
1161                             return parenthesesResult;
1162
1163                         break;
1164                     }
1165                 }
1166             } else {
1167                 resetMatches(term, context);
1168                 popParenthesesDisjunctionContext(backTrack);
1169                 freeParenthesesDisjunctionContext(context);
1170
1171                 if (result != JSRegExpNoMatch || backTrack->matchAmount < term.atom.quantityMinCount)
1172                     return result;
1173             }
1174
1175             if (backTrack->matchAmount) {
1176                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
1177                 recordParenthesesMatch(term, context);
1178             }
1179             return JSRegExpMatch;
1180         }
1181
1182         case QuantifierNonGreedy: {
1183             // If we've not reached the limit, try to add one more match.
1184             if (backTrack->matchAmount < term.atom.quantityMaxCount) {
1185                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1186                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1187                 if (result == JSRegExpMatch) {
1188                     appendParenthesesDisjunctionContext(backTrack, context);
1189                     recordParenthesesMatch(term, context);
1190                     return JSRegExpMatch;
1191                 }
1192
1193                 resetMatches(term, context);
1194                 freeParenthesesDisjunctionContext(context);
1195
1196                 if (result != JSRegExpNoMatch)
1197                     return result;
1198             }
1199
1200             // Nope - okay backtrack looking for an alternative.
1201             while (backTrack->matchAmount) {
1202                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
1203                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
1204                 if (result == JSRegExpMatch) {
1205                     // successful backtrack! we're back in the game!
1206                     if (backTrack->matchAmount) {
1207                         context = backTrack->lastContext;
1208                         recordParenthesesMatch(term, context);
1209                     }
1210                     return JSRegExpMatch;
1211                 }
1212
1213                 // pop a match off the stack
1214                 resetMatches(term, context);
1215                 popParenthesesDisjunctionContext(backTrack);
1216                 freeParenthesesDisjunctionContext(context);
1217
1218                 if (result != JSRegExpNoMatch)
1219                     return result;
1220             }
1221
1222             return JSRegExpNoMatch;
1223         }
1224         }
1225
1226         RELEASE_ASSERT_NOT_REACHED();
1227         return JSRegExpErrorNoMatch;
1228     }
1229
1230     bool matchDotStarEnclosure(ByteTerm& term, DisjunctionContext* context)
1231     {
1232         UNUSED_PARAM(term);
1233
1234         if (pattern->dotAll()) {
1235             context->matchBegin = startOffset;
1236             context->matchEnd = input.end();
1237             return true;
1238         }
1239
1240         unsigned matchBegin = context->matchBegin;
1241
1242         if (matchBegin > startOffset) {
1243             for (matchBegin--; true; matchBegin--) {
1244                 if (testCharacterClass(pattern->newlineCharacterClass, input.reread(matchBegin))) {
1245                     ++matchBegin;
1246                     break;
1247                 }
1248
1249                 if (matchBegin == startOffset)
1250                     break;
1251             }
1252         }
1253
1254         unsigned matchEnd = input.getPos();
1255
1256         for (; (matchEnd != input.end())
1257              && (!testCharacterClass(pattern->newlineCharacterClass, input.reread(matchEnd))); matchEnd++) { }
1258
1259         if (((matchBegin && term.anchors.m_bol)
1260              || ((matchEnd != input.end()) && term.anchors.m_eol))
1261             && !pattern->multiline())
1262             return false;
1263
1264         context->matchBegin = matchBegin;
1265         context->matchEnd = matchEnd;
1266         return true;
1267     }
1268
1269 #define MATCH_NEXT() { ++context->term; goto matchAgain; }
1270 #define BACKTRACK() { --context->term; goto backtrack; }
1271 #define currentTerm() (disjunction->terms[context->term])
1272     JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1273     {
1274         if (!--remainingMatchCount)
1275             return JSRegExpErrorHitLimit;
1276
1277         if (btrack)
1278             BACKTRACK();
1279
1280         context->matchBegin = input.getPos();
1281         context->term = 0;
1282
1283     matchAgain:
1284         ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1285
1286         switch (currentTerm().type) {
1287         case ByteTerm::TypeSubpatternBegin:
1288             MATCH_NEXT();
1289         case ByteTerm::TypeSubpatternEnd:
1290             context->matchEnd = input.getPos();
1291             return JSRegExpMatch;
1292
1293         case ByteTerm::TypeBodyAlternativeBegin:
1294             MATCH_NEXT();
1295         case ByteTerm::TypeBodyAlternativeDisjunction:
1296         case ByteTerm::TypeBodyAlternativeEnd:
1297             context->matchEnd = input.getPos();
1298             return JSRegExpMatch;
1299
1300         case ByteTerm::TypeAlternativeBegin:
1301             MATCH_NEXT();
1302         case ByteTerm::TypeAlternativeDisjunction:
1303         case ByteTerm::TypeAlternativeEnd: {
1304             int offset = currentTerm().alternative.end;
1305             BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1306             backTrack->offset = offset;
1307             context->term += offset;
1308             MATCH_NEXT();
1309         }
1310
1311         case ByteTerm::TypeAssertionBOL:
1312             if (matchAssertionBOL(currentTerm()))
1313                 MATCH_NEXT();
1314             BACKTRACK();
1315         case ByteTerm::TypeAssertionEOL:
1316             if (matchAssertionEOL(currentTerm()))
1317                 MATCH_NEXT();
1318             BACKTRACK();
1319         case ByteTerm::TypeAssertionWordBoundary:
1320             if (matchAssertionWordBoundary(currentTerm()))
1321                 MATCH_NEXT();
1322             BACKTRACK();
1323
1324         case ByteTerm::TypePatternCharacterOnce:
1325         case ByteTerm::TypePatternCharacterFixed: {
1326             if (unicode) {
1327                 if (!U_IS_BMP(currentTerm().atom.patternCharacter)) {
1328                     for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) {
1329                         if (!checkSurrogatePair(currentTerm().atom.patternCharacter, currentTerm().inputPosition - 2 * matchAmount)) {
1330                             BACKTRACK();
1331                         }
1332                     }
1333                     MATCH_NEXT();
1334                 }
1335             }
1336             unsigned position = input.getPos(); // May need to back out reading a surrogate pair.
1337
1338             for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) {
1339                 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - matchAmount)) {
1340                     input.setPos(position);
1341                     BACKTRACK();
1342                 }
1343             }
1344             MATCH_NEXT();
1345         }
1346         case ByteTerm::TypePatternCharacterGreedy: {
1347             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1348             unsigned matchAmount = 0;
1349             unsigned position = input.getPos(); // May need to back out reading a surrogate pair.
1350             while ((matchAmount < currentTerm().atom.quantityMaxCount) && input.checkInput(1)) {
1351                 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + 1)) {
1352                     input.setPos(position);
1353                     break;
1354                 }
1355                 ++matchAmount;
1356                 position = input.getPos();
1357             }
1358             backTrack->matchAmount = matchAmount;
1359
1360             MATCH_NEXT();
1361         }
1362         case ByteTerm::TypePatternCharacterNonGreedy: {
1363             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1364             backTrack->begin = input.getPos();
1365             backTrack->matchAmount = 0;
1366             MATCH_NEXT();
1367         }
1368
1369         case ByteTerm::TypePatternCasedCharacterOnce:
1370         case ByteTerm::TypePatternCasedCharacterFixed: {
1371             if (unicode) {
1372                 // Case insensitive matching of unicode characters is handled as TypeCharacterClass.
1373                 ASSERT(U_IS_BMP(currentTerm().atom.patternCharacter));
1374
1375                 unsigned position = input.getPos(); // May need to back out reading a surrogate pair.
1376                 
1377                 for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) {
1378                     if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount)) {
1379                         input.setPos(position);
1380                         BACKTRACK();
1381                     }
1382                 }
1383                 MATCH_NEXT();
1384             }
1385
1386             for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) {
1387                 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount))
1388                     BACKTRACK();
1389             }
1390             MATCH_NEXT();
1391         }
1392         case ByteTerm::TypePatternCasedCharacterGreedy: {
1393             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1394
1395             // Case insensitive matching of unicode characters is handled as TypeCharacterClass.
1396             ASSERT(!unicode || U_IS_BMP(currentTerm().atom.patternCharacter));
1397
1398             unsigned matchAmount = 0;
1399             while ((matchAmount < currentTerm().atom.quantityMaxCount) && input.checkInput(1)) {
1400                 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + 1)) {
1401                     input.uncheckInput(1);
1402                     break;
1403                 }
1404                 ++matchAmount;
1405             }
1406             backTrack->matchAmount = matchAmount;
1407
1408             MATCH_NEXT();
1409         }
1410         case ByteTerm::TypePatternCasedCharacterNonGreedy: {
1411             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1412
1413             // Case insensitive matching of unicode characters is handled as TypeCharacterClass.
1414             ASSERT(!unicode || U_IS_BMP(currentTerm().atom.patternCharacter));
1415             
1416             backTrack->matchAmount = 0;
1417             MATCH_NEXT();
1418         }
1419
1420         case ByteTerm::TypeCharacterClass:
1421             if (matchCharacterClass(currentTerm(), context))
1422                 MATCH_NEXT();
1423             BACKTRACK();
1424         case ByteTerm::TypeBackReference:
1425             if (matchBackReference(currentTerm(), context))
1426                 MATCH_NEXT();
1427             BACKTRACK();
1428         case ByteTerm::TypeParenthesesSubpattern: {
1429             JSRegExpResult result = matchParentheses(currentTerm(), context);
1430
1431             if (result == JSRegExpMatch) {
1432                 MATCH_NEXT();
1433             }  else if (result != JSRegExpNoMatch)
1434                 return result;
1435
1436             BACKTRACK();
1437         }
1438         case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1439             if (matchParenthesesOnceBegin(currentTerm(), context))
1440                 MATCH_NEXT();
1441             BACKTRACK();
1442         case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1443             if (matchParenthesesOnceEnd(currentTerm(), context))
1444                 MATCH_NEXT();
1445             BACKTRACK();
1446         case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1447             if (matchParenthesesTerminalBegin(currentTerm(), context))
1448                 MATCH_NEXT();
1449             BACKTRACK();
1450         case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1451             if (matchParenthesesTerminalEnd(currentTerm(), context))
1452                 MATCH_NEXT();
1453             BACKTRACK();
1454         case ByteTerm::TypeParentheticalAssertionBegin:
1455             if (matchParentheticalAssertionBegin(currentTerm(), context))
1456                 MATCH_NEXT();
1457             BACKTRACK();
1458         case ByteTerm::TypeParentheticalAssertionEnd:
1459             if (matchParentheticalAssertionEnd(currentTerm(), context))
1460                 MATCH_NEXT();
1461             BACKTRACK();
1462
1463         case ByteTerm::TypeCheckInput:
1464             if (input.checkInput(currentTerm().checkInputCount))
1465                 MATCH_NEXT();
1466             BACKTRACK();
1467
1468         case ByteTerm::TypeUncheckInput:
1469             input.uncheckInput(currentTerm().checkInputCount);
1470             MATCH_NEXT();
1471                 
1472         case ByteTerm::TypeDotStarEnclosure:
1473             if (matchDotStarEnclosure(currentTerm(), context))
1474                 return JSRegExpMatch;
1475             BACKTRACK();
1476         }
1477
1478         // We should never fall-through to here.
1479         RELEASE_ASSERT_NOT_REACHED();
1480
1481     backtrack:
1482         ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1483
1484         switch (currentTerm().type) {
1485         case ByteTerm::TypeSubpatternBegin:
1486             return JSRegExpNoMatch;
1487         case ByteTerm::TypeSubpatternEnd:
1488             RELEASE_ASSERT_NOT_REACHED();
1489
1490         case ByteTerm::TypeBodyAlternativeBegin:
1491         case ByteTerm::TypeBodyAlternativeDisjunction: {
1492             int offset = currentTerm().alternative.next;
1493             context->term += offset;
1494             if (offset > 0)
1495                 MATCH_NEXT();
1496
1497             if (input.atEnd() || pattern->sticky())
1498                 return JSRegExpNoMatch;
1499
1500             input.next();
1501
1502             context->matchBegin = input.getPos();
1503
1504             if (currentTerm().alternative.onceThrough)
1505                 context->term += currentTerm().alternative.next;
1506
1507             MATCH_NEXT();
1508         }
1509         case ByteTerm::TypeBodyAlternativeEnd:
1510             RELEASE_ASSERT_NOT_REACHED();
1511
1512         case ByteTerm::TypeAlternativeBegin:
1513         case ByteTerm::TypeAlternativeDisjunction: {
1514             int offset = currentTerm().alternative.next;
1515             context->term += offset;
1516             if (offset > 0)
1517                 MATCH_NEXT();
1518             BACKTRACK();
1519         }
1520         case ByteTerm::TypeAlternativeEnd: {
1521             // We should never backtrack back into an alternative of the main body of the regex.
1522             BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1523             unsigned offset = backTrack->offset;
1524             context->term -= offset;
1525             BACKTRACK();
1526         }
1527
1528         case ByteTerm::TypeAssertionBOL:
1529         case ByteTerm::TypeAssertionEOL:
1530         case ByteTerm::TypeAssertionWordBoundary:
1531             BACKTRACK();
1532
1533         case ByteTerm::TypePatternCharacterOnce:
1534         case ByteTerm::TypePatternCharacterFixed:
1535         case ByteTerm::TypePatternCharacterGreedy:
1536         case ByteTerm::TypePatternCharacterNonGreedy:
1537             if (backtrackPatternCharacter(currentTerm(), context))
1538                 MATCH_NEXT();
1539             BACKTRACK();
1540         case ByteTerm::TypePatternCasedCharacterOnce:
1541         case ByteTerm::TypePatternCasedCharacterFixed:
1542         case ByteTerm::TypePatternCasedCharacterGreedy:
1543         case ByteTerm::TypePatternCasedCharacterNonGreedy:
1544             if (backtrackPatternCasedCharacter(currentTerm(), context))
1545                 MATCH_NEXT();
1546             BACKTRACK();
1547         case ByteTerm::TypeCharacterClass:
1548             if (backtrackCharacterClass(currentTerm(), context))
1549                 MATCH_NEXT();
1550             BACKTRACK();
1551         case ByteTerm::TypeBackReference:
1552             if (backtrackBackReference(currentTerm(), context))
1553                 MATCH_NEXT();
1554             BACKTRACK();
1555         case ByteTerm::TypeParenthesesSubpattern: {
1556             JSRegExpResult result = backtrackParentheses(currentTerm(), context);
1557
1558             if (result == JSRegExpMatch) {
1559                 MATCH_NEXT();
1560             } else if (result != JSRegExpNoMatch)
1561                 return result;
1562
1563             BACKTRACK();
1564         }
1565         case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1566             if (backtrackParenthesesOnceBegin(currentTerm(), context))
1567                 MATCH_NEXT();
1568             BACKTRACK();
1569         case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1570             if (backtrackParenthesesOnceEnd(currentTerm(), context))
1571                 MATCH_NEXT();
1572             BACKTRACK();
1573         case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1574             if (backtrackParenthesesTerminalBegin(currentTerm(), context))
1575                 MATCH_NEXT();
1576             BACKTRACK();
1577         case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1578             if (backtrackParenthesesTerminalEnd(currentTerm(), context))
1579                 MATCH_NEXT();
1580             BACKTRACK();
1581         case ByteTerm::TypeParentheticalAssertionBegin:
1582             if (backtrackParentheticalAssertionBegin(currentTerm(), context))
1583                 MATCH_NEXT();
1584             BACKTRACK();
1585         case ByteTerm::TypeParentheticalAssertionEnd:
1586             if (backtrackParentheticalAssertionEnd(currentTerm(), context))
1587                 MATCH_NEXT();
1588             BACKTRACK();
1589
1590         case ByteTerm::TypeCheckInput:
1591             input.uncheckInput(currentTerm().checkInputCount);
1592             BACKTRACK();
1593
1594         case ByteTerm::TypeUncheckInput:
1595             input.checkInput(currentTerm().checkInputCount);
1596             BACKTRACK();
1597
1598         case ByteTerm::TypeDotStarEnclosure:
1599             RELEASE_ASSERT_NOT_REACHED();
1600         }
1601
1602         RELEASE_ASSERT_NOT_REACHED();
1603         return JSRegExpErrorNoMatch;
1604     }
1605
1606     JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1607     {
1608         JSRegExpResult result = matchDisjunction(disjunction, context, btrack);
1609
1610         if (result == JSRegExpMatch) {
1611             while (context->matchBegin == context->matchEnd) {
1612                 result = matchDisjunction(disjunction, context, true);
1613                 if (result != JSRegExpMatch)
1614                     return result;
1615             }
1616             return JSRegExpMatch;
1617         }
1618
1619         return result;
1620     }
1621
1622     unsigned interpret()
1623     {
1624         // FIXME: https://bugs.webkit.org/show_bug.cgi?id=195970
1625         // [Yarr Interpreter] The interpreter doesn't have checks for stack overflow due to deep recursion
1626         if (!input.isAvailableInput(0))
1627             return offsetNoMatch;
1628
1629         if (pattern->m_lock)
1630             pattern->m_lock->lock();
1631         
1632         for (unsigned i = 0; i < pattern->m_body->m_numSubpatterns + 1; ++i)
1633             output[i << 1] = offsetNoMatch;
1634
1635         allocatorPool = pattern->m_allocator->startAllocator();
1636         RELEASE_ASSERT(allocatorPool);
1637
1638         DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
1639
1640         JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false);
1641         if (result == JSRegExpMatch) {
1642             output[0] = context->matchBegin;
1643             output[1] = context->matchEnd;
1644         }
1645
1646         freeDisjunctionContext(context);
1647
1648         pattern->m_allocator->stopAllocator();
1649
1650         ASSERT((result == JSRegExpMatch) == (output[0] != offsetNoMatch));
1651
1652         if (pattern->m_lock)
1653             pattern->m_lock->unlock();
1654         
1655         return output[0];
1656     }
1657
1658     Interpreter(BytecodePattern* pattern, unsigned* output, const CharType* input, unsigned length, unsigned start)
1659         : pattern(pattern)
1660         , unicode(pattern->unicode())
1661         , output(output)
1662         , input(input, start, length, pattern->unicode())
1663         , startOffset(start)
1664         , remainingMatchCount(matchLimit)
1665     {
1666     }
1667
1668 private:
1669     BytecodePattern* pattern;
1670     bool unicode;
1671     unsigned* output;
1672     InputStream input;
1673     WTF::BumpPointerPool* allocatorPool { nullptr };
1674     unsigned startOffset;
1675     unsigned remainingMatchCount;
1676 };
1677
1678 class ByteCompiler {
1679     struct ParenthesesStackEntry {
1680         unsigned beginTerm;
1681         unsigned savedAlternativeIndex;
1682         ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/)
1683             : beginTerm(beginTerm)
1684             , savedAlternativeIndex(savedAlternativeIndex)
1685         {
1686         }
1687     };
1688
1689 public:
1690     ByteCompiler(YarrPattern& pattern)
1691         : m_pattern(pattern)
1692     {
1693     }
1694
1695     std::unique_ptr<BytecodePattern> compile(BumpPointerAllocator* allocator, ConcurrentJSLock* lock, ErrorCode& errorCode)
1696     {
1697         regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough());
1698         if (auto error = emitDisjunction(m_pattern.m_body, 0, 0)) {
1699             errorCode = error.value();
1700             return nullptr;
1701         }
1702         regexEnd();
1703
1704 #ifndef NDEBUG
1705         if (Options::dumpCompiledRegExpPatterns())
1706             dumpDisjunction(m_bodyDisjunction.get());
1707 #endif
1708
1709         return makeUnique<BytecodePattern>(WTFMove(m_bodyDisjunction), m_allParenthesesInfo, m_pattern, allocator, lock);
1710     }
1711
1712     void checkInput(unsigned count)
1713     {
1714         m_bodyDisjunction->terms.append(ByteTerm::CheckInput(count));
1715     }
1716
1717     void uncheckInput(unsigned count)
1718     {
1719         m_bodyDisjunction->terms.append(ByteTerm::UncheckInput(count));
1720     }
1721     
1722     void assertionBOL(unsigned inputPosition)
1723     {
1724         m_bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition));
1725     }
1726
1727     void assertionEOL(unsigned inputPosition)
1728     {
1729         m_bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition));
1730     }
1731
1732     void assertionWordBoundary(bool invert, unsigned inputPosition)
1733     {
1734         m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition));
1735     }
1736
1737     void atomPatternCharacter(UChar32 ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1738     {
1739         if (m_pattern.ignoreCase()) {
1740             UChar32 lo = u_tolower(ch);
1741             UChar32 hi = u_toupper(ch);
1742
1743             if (lo != hi) {
1744                 m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityMaxCount, quantityType));
1745                 return;
1746             }
1747         }
1748
1749         m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityMaxCount, quantityType));
1750     }
1751
1752     void atomCharacterClass(CharacterClass* characterClass, bool invert, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1753     {
1754         m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));
1755
1756         m_bodyDisjunction->terms.last().atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1757         m_bodyDisjunction->terms.last().atom.quantityType = quantityType;
1758         m_bodyDisjunction->terms.last().frameLocation = frameLocation;
1759     }
1760
1761     void atomBackReference(unsigned subpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1762     {
1763         ASSERT(subpatternId);
1764
1765         m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
1766
1767         m_bodyDisjunction->terms.last().atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1768         m_bodyDisjunction->terms.last().atom.quantityType = quantityType;
1769         m_bodyDisjunction->terms.last().frameLocation = frameLocation;
1770     }
1771
1772     void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1773     {
1774         unsigned beginTerm = m_bodyDisjunction->terms.size();
1775
1776         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1777         m_bodyDisjunction->terms.last().frameLocation = frameLocation;
1778         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1779         m_bodyDisjunction->terms.last().frameLocation = alternativeFrameLocation;
1780
1781         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1782         m_currentAlternativeIndex = beginTerm + 1;
1783     }
1784
1785     void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1786     {
1787         unsigned beginTerm = m_bodyDisjunction->terms.size();
1788
1789         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition));
1790         m_bodyDisjunction->terms.last().frameLocation = frameLocation;
1791         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1792         m_bodyDisjunction->terms.last().frameLocation = alternativeFrameLocation;
1793
1794         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1795         m_currentAlternativeIndex = beginTerm + 1;
1796     }
1797
1798     void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1799     {
1800         // Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin,
1801         // then fix this up at the end! - simplifying this should make it much clearer.
1802         // https://bugs.webkit.org/show_bug.cgi?id=50136
1803
1804         unsigned beginTerm = m_bodyDisjunction->terms.size();
1805
1806         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1807         m_bodyDisjunction->terms.last().frameLocation = frameLocation;
1808         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1809         m_bodyDisjunction->terms.last().frameLocation = alternativeFrameLocation;
1810
1811         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1812         m_currentAlternativeIndex = beginTerm + 1;
1813     }
1814
1815     void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation)
1816     {
1817         unsigned beginTerm = m_bodyDisjunction->terms.size();
1818
1819         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, 0));
1820         m_bodyDisjunction->terms.last().frameLocation = frameLocation;
1821         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1822         m_bodyDisjunction->terms.last().frameLocation = alternativeFrameLocation;
1823
1824         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1825         m_currentAlternativeIndex = beginTerm + 1;
1826     }
1827
1828     void atomParentheticalAssertionEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1829     {
1830         unsigned beginTerm = popParenthesesStack();
1831         closeAlternative(beginTerm + 1);
1832         unsigned endTerm = m_bodyDisjunction->terms.size();
1833
1834         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin);
1835
1836         bool invert = m_bodyDisjunction->terms[beginTerm].invert();
1837         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1838
1839         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd, subpatternId, false, invert, inputPosition));
1840         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1841         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1842         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1843
1844         m_bodyDisjunction->terms[beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1845         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1846         m_bodyDisjunction->terms[endTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1847         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1848     }
1849
1850     void assertionDotStarEnclosure(bool bolAnchored, bool eolAnchored)
1851     {
1852         m_bodyDisjunction->terms.append(ByteTerm::DotStarEnclosure(bolAnchored, eolAnchored));
1853     }
1854
1855     unsigned popParenthesesStack()
1856     {
1857         ASSERT(m_parenthesesStack.size());
1858         unsigned beginTerm = m_parenthesesStack.last().beginTerm;
1859         m_currentAlternativeIndex = m_parenthesesStack.last().savedAlternativeIndex;
1860         m_parenthesesStack.removeLast();
1861
1862         ASSERT(beginTerm < m_bodyDisjunction->terms.size());
1863         ASSERT(m_currentAlternativeIndex < m_bodyDisjunction->terms.size());
1864
1865         return beginTerm;
1866     }
1867
1868     void closeAlternative(unsigned beginTerm)
1869     {
1870         unsigned origBeginTerm = beginTerm;
1871         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin);
1872         unsigned endIndex = m_bodyDisjunction->terms.size();
1873
1874         unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
1875
1876         if (!m_bodyDisjunction->terms[beginTerm].alternative.next)
1877             m_bodyDisjunction->terms.remove(beginTerm);
1878         else {
1879             while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
1880                 beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
1881                 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction);
1882                 m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
1883                 m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1884             }
1885
1886             m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
1887
1888             m_bodyDisjunction->terms.append(ByteTerm::AlternativeEnd());
1889             m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
1890         }
1891     }
1892
1893     void closeBodyAlternative()
1894     {
1895         unsigned beginTerm = 0;
1896         unsigned origBeginTerm = 0;
1897         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);
1898         unsigned endIndex = m_bodyDisjunction->terms.size();
1899
1900         unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
1901
1902         while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
1903             beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
1904             ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction);
1905             m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
1906             m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1907         }
1908
1909         m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
1910
1911         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd());
1912         m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
1913     }
1914
1915     void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMinCount, Checked<unsigned> quantityMaxCount, QuantifierType quantityType, unsigned callFrameSize = 0)
1916     {
1917         unsigned beginTerm = popParenthesesStack();
1918         closeAlternative(beginTerm + 1);
1919         unsigned endTerm = m_bodyDisjunction->terms.size();
1920
1921         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1922
1923         ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm];
1924
1925         bool capture = parenthesesBegin.capture();
1926         unsigned subpatternId = parenthesesBegin.atom.subpatternId;
1927
1928         unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
1929         auto parenthesesDisjunction = makeUnique<ByteDisjunction>(numSubpatterns, callFrameSize);
1930
1931         unsigned firstTermInParentheses = beginTerm + 1;
1932         parenthesesDisjunction->terms.reserveInitialCapacity(endTerm - firstTermInParentheses + 2);
1933
1934         parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin());
1935         for (unsigned termInParentheses = firstTermInParentheses; termInParentheses < endTerm; ++termInParentheses)
1936             parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]);
1937         parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd());
1938
1939         m_bodyDisjunction->terms.shrink(beginTerm);
1940
1941         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction.get(), capture, inputPosition));
1942         m_allParenthesesInfo.append(WTFMove(parenthesesDisjunction));
1943
1944         m_bodyDisjunction->terms[beginTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
1945         m_bodyDisjunction->terms[beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1946         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1947         m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1948     }
1949
1950     void atomParenthesesOnceEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMinCount, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1951     {
1952         unsigned beginTerm = popParenthesesStack();
1953         closeAlternative(beginTerm + 1);
1954         unsigned endTerm = m_bodyDisjunction->terms.size();
1955
1956         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1957
1958         bool capture = m_bodyDisjunction->terms[beginTerm].capture();
1959         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1960
1961         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, capture, false, inputPosition));
1962         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1963         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1964         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1965
1966         m_bodyDisjunction->terms[beginTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
1967         m_bodyDisjunction->terms[beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1968         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1969         m_bodyDisjunction->terms[endTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
1970         m_bodyDisjunction->terms[endTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1971         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1972     }
1973
1974     void atomParenthesesTerminalEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMinCount, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1975     {
1976         unsigned beginTerm = popParenthesesStack();
1977         closeAlternative(beginTerm + 1);
1978         unsigned endTerm = m_bodyDisjunction->terms.size();
1979
1980         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
1981
1982         bool capture = m_bodyDisjunction->terms[beginTerm].capture();
1983         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1984
1985         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, capture, false, inputPosition));
1986         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1987         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1988         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1989
1990         m_bodyDisjunction->terms[beginTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
1991         m_bodyDisjunction->terms[beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1992         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1993         m_bodyDisjunction->terms[endTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
1994         m_bodyDisjunction->terms[endTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1995         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1996     }
1997
1998     void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough)
1999     {
2000         m_bodyDisjunction = makeUnique<ByteDisjunction>(numSubpatterns, callFrameSize);
2001         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough));
2002         m_bodyDisjunction->terms[0].frameLocation = 0;
2003         m_currentAlternativeIndex = 0;
2004     }
2005
2006     void regexEnd()
2007     {
2008         closeBodyAlternative();
2009     }
2010
2011     void alternativeBodyDisjunction(bool onceThrough)
2012     {
2013         unsigned newAlternativeIndex = m_bodyDisjunction->terms.size();
2014         m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
2015         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction(onceThrough));
2016
2017         m_currentAlternativeIndex = newAlternativeIndex;
2018     }
2019
2020     void alternativeDisjunction()
2021     {
2022         unsigned newAlternativeIndex = m_bodyDisjunction->terms.size();
2023         m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
2024         m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction());
2025
2026         m_currentAlternativeIndex = newAlternativeIndex;
2027     }
2028
2029     Optional<ErrorCode> WARN_UNUSED_RETURN emitDisjunction(PatternDisjunction* disjunction, Checked<unsigned, RecordOverflow> inputCountAlreadyChecked, unsigned parenthesesInputCountAlreadyChecked)
2030     {
2031         for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
2032             auto currentCountAlreadyChecked = inputCountAlreadyChecked;
2033
2034             PatternAlternative* alternative = disjunction->m_alternatives[alt].get();
2035
2036             if (alt) {
2037                 if (disjunction == m_pattern.m_body)
2038                     alternativeBodyDisjunction(alternative->onceThrough());
2039                 else
2040                     alternativeDisjunction();
2041             }
2042
2043             unsigned minimumSize = alternative->m_minimumSize;
2044             ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked);
2045             unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked;
2046
2047             if (countToCheck) {
2048                 checkInput(countToCheck);
2049                 currentCountAlreadyChecked += countToCheck;
2050                 if (currentCountAlreadyChecked.hasOverflowed())
2051                     return ErrorCode::OffsetTooLarge;
2052             }
2053
2054             for (auto& term : alternative->m_terms) {
2055                 switch (term.type) {
2056                 case PatternTerm::TypeAssertionBOL:
2057                     assertionBOL((currentCountAlreadyChecked - term.inputPosition).unsafeGet());
2058                     break;
2059
2060                 case PatternTerm::TypeAssertionEOL:
2061                     assertionEOL((currentCountAlreadyChecked - term.inputPosition).unsafeGet());
2062                     break;
2063
2064                 case PatternTerm::TypeAssertionWordBoundary:
2065                     assertionWordBoundary(term.invert(), (currentCountAlreadyChecked - term.inputPosition).unsafeGet());
2066                     break;
2067
2068                 case PatternTerm::TypePatternCharacter:
2069                     atomPatternCharacter(term.patternCharacter, (currentCountAlreadyChecked - term.inputPosition).unsafeGet(), term.frameLocation, term.quantityMaxCount, term.quantityType);
2070                     break;
2071
2072                 case PatternTerm::TypeCharacterClass:
2073                     atomCharacterClass(term.characterClass, term.invert(), (currentCountAlreadyChecked - term.inputPosition).unsafeGet(), term.frameLocation, term.quantityMaxCount, term.quantityType);
2074                     break;
2075
2076                 case PatternTerm::TypeBackReference:
2077                     atomBackReference(term.backReferenceSubpatternId, (currentCountAlreadyChecked - term.inputPosition).unsafeGet(), term.frameLocation, term.quantityMaxCount, term.quantityType);
2078                     break;
2079
2080                 case PatternTerm::TypeForwardReference:
2081                     break;
2082
2083                 case PatternTerm::TypeParenthesesSubpattern: {
2084                     unsigned disjunctionAlreadyCheckedCount = 0;
2085                     if (term.quantityMaxCount == 1 && !term.parentheses.isCopy) {
2086                         unsigned alternativeFrameLocation = term.frameLocation;
2087                         // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame.
2088                         if (term.quantityType == QuantifierFixedCount)
2089                             disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;
2090                         else
2091                             alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
2092                         unsigned delegateEndInputOffset = (currentCountAlreadyChecked - term.inputPosition).unsafeGet();
2093                         atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount + delegateEndInputOffset, term.frameLocation, alternativeFrameLocation);
2094                         if (auto error = emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount))
2095                             return error;
2096                         atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityMinCount, term.quantityMaxCount, term.quantityType);
2097                     } else if (term.parentheses.isTerminal) {
2098                         unsigned delegateEndInputOffset = (currentCountAlreadyChecked - term.inputPosition).unsafeGet();
2099                         atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount + delegateEndInputOffset, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesTerminal);
2100                         if (auto error = emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount))
2101                             return error;
2102                         atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityMinCount, term.quantityMaxCount, term.quantityType);
2103                     } else {
2104                         unsigned delegateEndInputOffset = (currentCountAlreadyChecked - term.inputPosition).unsafeGet();
2105                         atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount + delegateEndInputOffset, term.frameLocation, 0);
2106                         if (auto error = emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0))
2107                             return error;
2108                         atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityMinCount, term.quantityMaxCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
2109                     }
2110                     break;
2111                 }
2112
2113                 case PatternTerm::TypeParentheticalAssertion: {
2114                     unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion;
2115                     unsigned positiveInputOffset = (currentCountAlreadyChecked - term.inputPosition).unsafeGet();
2116                     unsigned uncheckAmount = 0;
2117                     if (positiveInputOffset > term.parentheses.disjunction->m_minimumSize) {
2118                         uncheckAmount = positiveInputOffset - term.parentheses.disjunction->m_minimumSize;
2119                         uncheckInput(uncheckAmount);
2120                         currentCountAlreadyChecked -= uncheckAmount;
2121                         if (currentCountAlreadyChecked.hasOverflowed())
2122                             return ErrorCode::OffsetTooLarge;
2123                     }
2124
2125                     atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation);
2126                     if (auto error = emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset - uncheckAmount))
2127                         return error;
2128                     atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityMaxCount, term.quantityType);
2129                     if (uncheckAmount) {
2130                         checkInput(uncheckAmount);
2131                         currentCountAlreadyChecked += uncheckAmount;
2132                         if (currentCountAlreadyChecked.hasOverflowed())
2133                             return ErrorCode::OffsetTooLarge;
2134                     }
2135                     break;
2136                 }
2137
2138                 case PatternTerm::TypeDotStarEnclosure:
2139                     assertionDotStarEnclosure(term.anchors.bolAnchor, term.anchors.eolAnchor);
2140                     break;
2141                 }
2142             }
2143         }
2144         return WTF::nullopt;
2145     }
2146 #ifndef NDEBUG
2147     void dumpDisjunction(ByteDisjunction* disjunction, unsigned nesting = 0)
2148     {
2149         PrintStream& out = WTF::dataFile();
2150
2151         unsigned termIndexNest = 0;
2152
2153         if (!nesting) {
2154             out.printf("ByteDisjunction(%p):\n", disjunction);
2155             nesting = 1;
2156         } else {
2157             termIndexNest = nesting - 1;
2158             nesting = 2;
2159         }
2160
2161         auto outputTermIndexAndNest = [&](size_t index, unsigned termNesting) {
2162             for (unsigned nestingDepth = 0; nestingDepth < termIndexNest; nestingDepth++)
2163                 out.print("  ");
2164             out.printf("%4zu", index);
2165             for (unsigned nestingDepth = 0; nestingDepth < termNesting; nestingDepth++)
2166                 out.print("  ");
2167         };
2168
2169         auto dumpQuantity = [&](ByteTerm& term) {
2170             if (term.atom.quantityType == QuantifierFixedCount && term.atom.quantityMinCount == 1 && term.atom.quantityMaxCount == 1)
2171                 return;
2172
2173             out.print(" {", term.atom.quantityMinCount);
2174             if (term.atom.quantityMinCount != term.atom.quantityMaxCount) {
2175                 if (term.atom.quantityMaxCount == UINT_MAX)
2176                     out.print(",inf");
2177                 else
2178                     out.print(",", term.atom.quantityMaxCount);
2179             }
2180             out.print("}");
2181             if (term.atom.quantityType == QuantifierGreedy)
2182                 out.print(" greedy");
2183             else if (term.atom.quantityType == QuantifierNonGreedy)
2184                 out.print(" non-greedy");
2185         };
2186
2187         auto dumpCaptured = [&](ByteTerm& term) {
2188             if (term.capture())
2189                 out.print(" captured (#", term.atom.subpatternId, ")");
2190         };
2191
2192         auto dumpInverted = [&](ByteTerm& term) {
2193             if (term.invert())
2194                 out.print(" inverted");
2195         };
2196
2197         auto dumpInputPosition = [&](ByteTerm& term) {
2198             out.printf(" inputPosition %u", term.inputPosition);
2199         };
2200
2201         auto dumpFrameLocation = [&](ByteTerm& term) {
2202             out.printf(" frameLocation %u", term.frameLocation);
2203         };
2204         
2205         auto dumpCharacter = [&](ByteTerm& term) {
2206             out.print(" ");
2207             dumpUChar32(out, term.atom.patternCharacter);
2208         };
2209
2210         auto dumpCharClass = [&](ByteTerm& term) {
2211             out.print(" ");
2212             dumpCharacterClass(out, &m_pattern, term.atom.characterClass);
2213         };
2214
2215         for (size_t idx = 0; idx < disjunction->terms.size(); ++idx) {
2216             ByteTerm term = disjunction->terms[idx];
2217
2218             bool outputNewline = true;
2219
2220             switch (term.type) {
2221             case ByteTerm::TypeBodyAlternativeBegin:
2222                 outputTermIndexAndNest(idx, nesting++);
2223                 out.print("BodyAlternativeBegin");
2224                 if (term.alternative.onceThrough)
2225                     out.print(" onceThrough");
2226                 dumpFrameLocation(term);
2227                 break;
2228             case ByteTerm::TypeBodyAlternativeDisjunction:
2229                 outputTermIndexAndNest(idx, nesting - 1);
2230                 out.print("BodyAlternativeDisjunction");
2231                 dumpFrameLocation(term);
2232                 break;
2233             case ByteTerm::TypeBodyAlternativeEnd:
2234                 outputTermIndexAndNest(idx, --nesting);
2235                 out.print("BodyAlternativeEnd");
2236                 dumpFrameLocation(term);
2237                 break;
2238             case ByteTerm::TypeAlternativeBegin:
2239                 outputTermIndexAndNest(idx, nesting++);
2240                 out.print("AlternativeBegin");
2241                 dumpFrameLocation(term);
2242                 break;
2243             case ByteTerm::TypeAlternativeDisjunction:
2244                 outputTermIndexAndNest(idx, nesting - 1);
2245                 out.print("AlternativeDisjunction");
2246                 dumpFrameLocation(term);
2247                 break;
2248             case ByteTerm::TypeAlternativeEnd:
2249                 outputTermIndexAndNest(idx, --nesting);
2250                 out.print("AlternativeEnd");
2251                 dumpFrameLocation(term);
2252                 break;
2253             case ByteTerm::TypeSubpatternBegin:
2254                 outputTermIndexAndNest(idx, nesting++);
2255                 out.print("SubpatternBegin");
2256                 break;
2257             case ByteTerm::TypeSubpatternEnd:
2258                 outputTermIndexAndNest(idx, --nesting);
2259                 out.print("SubpatternEnd");
2260                 break;
2261             case ByteTerm::TypeAssertionBOL:
2262                 outputTermIndexAndNest(idx, nesting);
2263                 out.print("AssertionBOL");
2264                 break;
2265             case ByteTerm::TypeAssertionEOL:
2266                 outputTermIndexAndNest(idx, nesting);
2267                 out.print("AssertionEOL");
2268                 break;
2269             case ByteTerm::TypeAssertionWordBoundary:
2270                 outputTermIndexAndNest(idx, nesting);
2271                 out.print("AssertionWordBoundary");
2272                 break;
2273             case ByteTerm::TypePatternCharacterOnce:
2274                 outputTermIndexAndNest(idx, nesting);
2275                 out.print("PatternCharacterOnce");
2276                 dumpInverted(term);
2277                 dumpInputPosition(term);
2278                 dumpFrameLocation(term);
2279                 dumpCharacter(term);
2280                 dumpQuantity(term);
2281                 break;
2282             case ByteTerm::TypePatternCharacterFixed:
2283                 outputTermIndexAndNest(idx, nesting);
2284                 out.print("PatternCharacterFixed");
2285                 dumpInverted(term);
2286                 dumpInputPosition(term);
2287                 dumpFrameLocation(term);
2288                 dumpCharacter(term);
2289                 out.print(" {", term.atom.quantityMinCount, "}");
2290                 break;
2291             case ByteTerm::TypePatternCharacterGreedy:
2292                 outputTermIndexAndNest(idx, nesting);
2293                 out.print("PatternCharacterGreedy");
2294                 dumpInverted(term);
2295                 dumpInputPosition(term);
2296                 dumpFrameLocation(term);
2297                 dumpCharacter(term);
2298                 dumpQuantity(term);
2299                 break;
2300             case ByteTerm::TypePatternCharacterNonGreedy:
2301                 outputTermIndexAndNest(idx, nesting);
2302                 out.print("PatternCharacterNonGreedy");
2303                 dumpInverted(term);
2304                 dumpInputPosition(term);
2305                 dumpFrameLocation(term);
2306                 dumpCharacter(term);
2307                 dumpQuantity(term);
2308                 break;
2309             case ByteTerm::TypePatternCasedCharacterOnce:
2310                 outputTermIndexAndNest(idx, nesting);
2311                 out.print("PatternCasedCharacterOnce");
2312                 break;
2313             case ByteTerm::TypePatternCasedCharacterFixed:
2314                 outputTermIndexAndNest(idx, nesting);
2315                 out.print("PatternCasedCharacterFixed");
2316                 break;
2317             case ByteTerm::TypePatternCasedCharacterGreedy:
2318                 outputTermIndexAndNest(idx, nesting);
2319                 out.print("PatternCasedCharacterGreedy");
2320                 break;
2321             case ByteTerm::TypePatternCasedCharacterNonGreedy:
2322                 outputTermIndexAndNest(idx, nesting);
2323                 out.print("PatternCasedCharacterNonGreedy");
2324                 break;
2325             case ByteTerm::TypeCharacterClass:
2326                 outputTermIndexAndNest(idx, nesting);
2327                 out.print("CharacterClass");
2328                 dumpInverted(term);
2329                 dumpInputPosition(term);
2330                 dumpFrameLocation(term);
2331                 dumpCharClass(term);
2332                 dumpQuantity(term);
2333                 break;
2334             case ByteTerm::TypeBackReference:
2335                 outputTermIndexAndNest(idx, nesting);
2336                 out.print("BackReference #", term.atom.subpatternId);
2337                 dumpQuantity(term);
2338                 break;
2339             case ByteTerm::TypeParenthesesSubpattern:
2340                 outputTermIndexAndNest(idx, nesting);
2341                 out.print("ParenthesesSubpattern");
2342                 dumpCaptured(term);
2343                 dumpInverted(term);
2344                 dumpInputPosition(term);
2345                 dumpFrameLocation(term);
2346                 dumpQuantity(term);
2347                 out.print("\n");
2348                 outputNewline = false;
2349                 dumpDisjunction(term.atom.parenthesesDisjunction, nesting);
2350                 break;
2351             case ByteTerm::TypeParenthesesSubpatternOnceBegin:
2352                 outputTermIndexAndNest(idx, nesting++);
2353                 out.print("ParenthesesSubpatternOnceBegin");
2354                 dumpCaptured(term);
2355                 dumpInverted(term);
2356                 dumpInputPosition(term);
2357                 dumpFrameLocation(term);
2358                 break;
2359             case ByteTerm::TypeParenthesesSubpatternOnceEnd:
2360                 outputTermIndexAndNest(idx, --nesting);
2361                 out.print("ParenthesesSubpatternOnceEnd");
2362                 dumpFrameLocation(term);
2363                 break;
2364             case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
2365                 outputTermIndexAndNest(idx, nesting++);
2366                 out.print("ParenthesesSubpatternTerminalBegin");
2367                 dumpInverted(term);
2368                 dumpInputPosition(term);
2369                 dumpFrameLocation(term);
2370                 break;
2371             case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
2372                 outputTermIndexAndNest(idx, --nesting);
2373                 out.print("ParenthesesSubpatternTerminalEnd");
2374                 dumpFrameLocation(term);
2375                 break;
2376             case ByteTerm::TypeParentheticalAssertionBegin:
2377                 outputTermIndexAndNest(idx, nesting++);
2378                 out.print("ParentheticalAssertionBegin");
2379                 dumpInverted(term);
2380                 dumpInputPosition(term);
2381                 dumpFrameLocation(term);
2382                 break;
2383             case ByteTerm::TypeParentheticalAssertionEnd:
2384                 outputTermIndexAndNest(idx, --nesting);
2385                 out.print("ParentheticalAssertionEnd");
2386                 dumpFrameLocation(term);
2387                 break;
2388             case ByteTerm::TypeCheckInput:
2389                 outputTermIndexAndNest(idx, nesting);
2390                 out.print("CheckInput ", term.checkInputCount);
2391                 break;
2392             case ByteTerm::TypeUncheckInput:
2393                 outputTermIndexAndNest(idx, nesting);
2394                 out.print("UncheckInput ", term.checkInputCount);
2395                 break;
2396             case ByteTerm::TypeDotStarEnclosure:
2397                 outputTermIndexAndNest(idx, nesting);
2398                 out.print("DotStarEnclosure");
2399                 break;
2400             }
2401             if (outputNewline)
2402                 out.print("\n");
2403         }
2404     }
2405 #endif
2406
2407 private:
2408     YarrPattern& m_pattern;
2409     std::unique_ptr<ByteDisjunction> m_bodyDisjunction;
2410     unsigned m_currentAlternativeIndex { 0 };
2411     Vector<ParenthesesStackEntry> m_parenthesesStack;
2412     Vector<std::unique_ptr<ByteDisjunction>> m_allParenthesesInfo;
2413 };
2414
2415 std::unique_ptr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator, ErrorCode& errorCode, ConcurrentJSLock* lock)
2416 {
2417     return ByteCompiler(pattern).compile(allocator, lock, errorCode);
2418 }
2419
2420 unsigned interpret(BytecodePattern* bytecode, const String& input, unsigned start, unsigned* output)
2421 {
2422     SuperSamplerScope superSamplerScope(false);
2423     if (input.is8Bit())
2424         return Interpreter<LChar>(bytecode, output, input.characters8(), input.length(), start).interpret();
2425     return Interpreter<UChar>(bytecode, output, input.characters16(), input.length(), start).interpret();
2426 }
2427
2428 unsigned interpret(BytecodePattern* bytecode, const LChar* input, unsigned length, unsigned start, unsigned* output)
2429 {
2430     SuperSamplerScope superSamplerScope(false);
2431     return Interpreter<LChar>(bytecode, output, input, length, start).interpret();
2432 }
2433
2434 unsigned interpret(BytecodePattern* bytecode, const UChar* input, unsigned length, unsigned start, unsigned* output)
2435 {
2436     SuperSamplerScope superSamplerScope(false);
2437     return Interpreter<UChar>(bytecode, output, input, length, start).interpret();
2438 }
2439
2440 // These should be the same for both UChar & LChar.
2441 COMPILE_ASSERT(sizeof(BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter);
2442 COMPILE_ASSERT(sizeof(BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass);
2443 COMPILE_ASSERT(sizeof(BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference);
2444 COMPILE_ASSERT(sizeof(BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative);
2445 COMPILE_ASSERT(sizeof(BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion);
2446 COMPILE_ASSERT(sizeof(BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce);
2447 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheses) <= (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses);
2448
2449
2450 } }