REGRESSION (r225695): Repro crash on yahoo login page
[WebKit-https.git] / Source / JavaScriptCore / yarr / YarrInterpreter.cpp
1 /*
2  * Copyright (C) 2009, 2013-2017 Apple Inc. All rights reserved.
3  * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26
27 #include "config.h"
28 #include "YarrInterpreter.h"
29
30 #include "Options.h"
31 #include "SuperSampler.h"
32 #include "Yarr.h"
33 #include "YarrCanonicalize.h"
34 #include <wtf/BumpPointerAllocator.h>
35 #include <wtf/DataLog.h>
36 #include <wtf/text/CString.h>
37 #include <wtf/text/WTFString.h>
38
39 using namespace WTF;
40
41 namespace JSC { namespace Yarr {
42
43 template<typename CharType>
44 class Interpreter {
45 public:
46     struct ParenthesesDisjunctionContext;
47
48     struct BackTrackInfoParentheses {
49         uintptr_t matchAmount;
50         ParenthesesDisjunctionContext* lastContext;
51     };
52
53     static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context)
54     {
55         context->next = backTrack->lastContext;
56         backTrack->lastContext = context;
57         ++backTrack->matchAmount;
58     }
59
60     static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack)
61     {
62         RELEASE_ASSERT(backTrack->matchAmount);
63         RELEASE_ASSERT(backTrack->lastContext);
64         backTrack->lastContext = backTrack->lastContext->next;
65         --backTrack->matchAmount;
66     }
67
68     struct DisjunctionContext
69     {
70         DisjunctionContext()
71             : term(0)
72         {
73         }
74
75         void* operator new(size_t, void* where)
76         {
77             return where;
78         }
79
80         int term;
81         unsigned matchBegin;
82         unsigned matchEnd;
83         uintptr_t frame[1];
84     };
85
86     DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction)
87     {
88         size_t size = sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
89         allocatorPool = allocatorPool->ensureCapacity(size);
90         RELEASE_ASSERT(allocatorPool);
91         return new (allocatorPool->alloc(size)) DisjunctionContext();
92     }
93
94     void freeDisjunctionContext(DisjunctionContext* context)
95     {
96         allocatorPool = allocatorPool->dealloc(context);
97     }
98
99     struct ParenthesesDisjunctionContext
100     {
101         ParenthesesDisjunctionContext(unsigned* output, ByteTerm& term)
102             : next(0)
103         {
104             unsigned firstSubpatternId = term.atom.subpatternId;
105             unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns;
106
107             for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) {
108                 subpatternBackup[i] = output[(firstSubpatternId << 1) + i];
109                 output[(firstSubpatternId << 1) + i] = offsetNoMatch;
110             }
111
112             new (getDisjunctionContext(term)) DisjunctionContext();
113         }
114
115         void* operator new(size_t, void* where)
116         {
117             return where;
118         }
119
120         void restoreOutput(unsigned* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns)
121         {
122             for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i)
123                 output[(firstSubpatternId << 1) + i] = subpatternBackup[i];
124         }
125
126         DisjunctionContext* getDisjunctionContext(ByteTerm& term)
127         {
128             return reinterpret_cast<DisjunctionContext*>(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1]));
129         }
130
131         ParenthesesDisjunctionContext* next;
132         unsigned subpatternBackup[1];
133     };
134
135     ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, unsigned* output, ByteTerm& term)
136     {
137         size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(unsigned) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + static_cast<size_t>(disjunction->m_frameSize) * sizeof(uintptr_t);
138         allocatorPool = allocatorPool->ensureCapacity(size);
139         RELEASE_ASSERT(allocatorPool);
140         return new (allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term);
141     }
142
143     void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context)
144     {
145         allocatorPool = allocatorPool->dealloc(context);
146     }
147
148     class InputStream {
149     public:
150         InputStream(const CharType* input, unsigned start, unsigned length, bool decodeSurrogatePairs)
151             : input(input)
152             , pos(start)
153             , length(length)
154             , decodeSurrogatePairs(decodeSurrogatePairs)
155         {
156         }
157
158         void next()
159         {
160             ++pos;
161         }
162
163         void rewind(unsigned amount)
164         {
165             ASSERT(pos >= amount);
166             pos -= amount;
167         }
168
169         int read()
170         {
171             ASSERT(pos < length);
172             if (pos < length)
173                 return input[pos];
174             return -1;
175         }
176
177         int readPair()
178         {
179             ASSERT(pos + 1 < length);
180             return input[pos] | input[pos + 1] << 16;
181         }
182
183         int readChecked(unsigned negativePositionOffest)
184         {
185             RELEASE_ASSERT(pos >= negativePositionOffest);
186             unsigned p = pos - negativePositionOffest;
187             ASSERT(p < length);
188             int result = input[p];
189             if (U16_IS_LEAD(result) && decodeSurrogatePairs && p + 1 < length && U16_IS_TRAIL(input[p + 1])) {
190                 if (atEnd())
191                     return -1;
192                 
193                 result = U16_GET_SUPPLEMENTARY(result, input[p + 1]);
194                 next();
195             }
196             return result;
197         }
198         
199         int readSurrogatePairChecked(unsigned negativePositionOffset)
200         {
201             RELEASE_ASSERT(pos >= negativePositionOffset);
202             unsigned p = pos - negativePositionOffset;
203             ASSERT(p < length);
204             if (p + 1 >= length)
205                 return -1;
206
207             int first = input[p];
208             int second = input[p + 1];
209             if (U16_IS_LEAD(first) && U16_IS_TRAIL(second))
210                 return U16_GET_SUPPLEMENTARY(first, second);
211
212             return -1;
213         }
214
215         int reread(unsigned from)
216         {
217             ASSERT(from < length);
218             int result = input[from];
219             if (U16_IS_LEAD(result) && decodeSurrogatePairs && from + 1 < length && U16_IS_TRAIL(input[from + 1]))
220                 result = U16_GET_SUPPLEMENTARY(result, input[from + 1]);
221             return result;
222         }
223
224         int prev()
225         {
226             ASSERT(!(pos > length));
227             if (pos && length)
228                 return input[pos - 1];
229             return -1;
230         }
231
232         unsigned getPos()
233         {
234             return pos;
235         }
236
237         void setPos(unsigned p)
238         {
239             pos = p;
240         }
241
242         bool atStart()
243         {
244             return pos == 0;
245         }
246
247         bool atEnd()
248         {
249             return pos == length;
250         }
251
252         unsigned end()
253         {
254             return length;
255         }
256
257         bool checkInput(unsigned count)
258         {
259             if (((pos + count) <= length) && ((pos + count) >= pos)) {
260                 pos += count;
261                 return true;
262             }
263             return false;
264         }
265
266         void uncheckInput(unsigned count)
267         {
268             RELEASE_ASSERT(pos >= count);
269             pos -= count;
270         }
271
272         bool atStart(unsigned negativePositionOffset)
273         {
274             return pos == negativePositionOffset;
275         }
276
277         bool atEnd(unsigned negativePositionOffest)
278         {
279             RELEASE_ASSERT(pos >= negativePositionOffest);
280             return (pos - negativePositionOffest) == length;
281         }
282
283         bool isAvailableInput(unsigned offset)
284         {
285             return (((pos + offset) <= length) && ((pos + offset) >= pos));
286         }
287
288     private:
289         const CharType* input;
290         unsigned pos;
291         unsigned length;
292         bool decodeSurrogatePairs;
293     };
294
295     bool testCharacterClass(CharacterClass* characterClass, int ch)
296     {
297         auto linearSearchMatches = [&ch](const Vector<UChar32>& matches) {
298             for (unsigned i = 0; i < matches.size(); ++i) {
299                 if (ch == matches[i])
300                     return true;
301             }
302
303             return false;
304         };
305
306         auto binarySearchMatches = [&ch](const Vector<UChar32>& matches) {
307             size_t low = 0;
308             size_t high = matches.size() - 1;
309
310             while (low <= high) {
311                 size_t mid = low + (high - low) / 2;
312                 int diff = ch - matches[mid];
313                 if (!diff)
314                     return true;
315
316                 if (diff < 0) {
317                     if (mid == low)
318                         return false;
319                     high = mid - 1;
320                 } else
321                     low = mid + 1;
322             }
323             return false;
324         };
325
326         auto linearSearchRanges = [&ch](const Vector<CharacterRange>& ranges) {
327             for (unsigned i = 0; i < ranges.size(); ++i) {
328                 if ((ch >= ranges[i].begin) && (ch <= ranges[i].end))
329                     return true;
330             }
331
332             return false;
333         };
334
335         auto binarySearchRanges = [&ch](const Vector<CharacterRange>& ranges) {
336             size_t low = 0;
337             size_t high = ranges.size() - 1;
338
339             while (low <= high) {
340                 size_t mid = low + (high - low) / 2;
341                 int rangeBeginDiff = ch - ranges[mid].begin;
342                 if (rangeBeginDiff >= 0 && ch <= ranges[mid].end)
343                     return true;
344
345                 if (rangeBeginDiff < 0) {
346                     if (mid == low)
347                         return false;
348                     high = mid - 1;
349                 } else
350                     low = mid + 1;
351             }
352             return false;
353         };
354
355         if (characterClass->m_anyCharacter)
356             return true;
357
358         const size_t thresholdForBinarySearch = 6;
359
360         if (!isASCII(ch)) {
361             if (characterClass->m_matchesUnicode.size()) {
362                 if (characterClass->m_matchesUnicode.size() > thresholdForBinarySearch) {
363                     if (binarySearchMatches(characterClass->m_matchesUnicode))
364                         return true;
365                 } else if (linearSearchMatches(characterClass->m_matchesUnicode))
366                     return true;
367             }
368
369             if (characterClass->m_rangesUnicode.size()) {
370                 if (characterClass->m_rangesUnicode.size() > thresholdForBinarySearch) {
371                     if (binarySearchRanges(characterClass->m_rangesUnicode))
372                         return true;
373                 } else if (linearSearchRanges(characterClass->m_rangesUnicode))
374                     return true;
375             }
376         } else {
377             if (characterClass->m_matches.size()) {
378                 if (characterClass->m_matches.size() > thresholdForBinarySearch) {
379                     if (binarySearchMatches(characterClass->m_matches))
380                         return true;
381                 } else if (linearSearchMatches(characterClass->m_matches))
382                     return true;
383             }
384
385             if (characterClass->m_ranges.size()) {
386                 if (characterClass->m_ranges.size() > thresholdForBinarySearch) {
387                     if (binarySearchRanges(characterClass->m_ranges))
388                         return true;
389                 } else if (linearSearchRanges(characterClass->m_ranges))
390                     return true;
391             }
392         }
393
394         return false;
395     }
396
397     bool checkCharacter(int testChar, unsigned negativeInputOffset)
398     {
399         return testChar == input.readChecked(negativeInputOffset);
400     }
401
402     bool checkSurrogatePair(int testUnicodeChar, unsigned negativeInputOffset)
403     {
404         return testUnicodeChar == input.readSurrogatePairChecked(negativeInputOffset);
405     }
406
407     bool checkCasedCharacter(int loChar, int hiChar, unsigned negativeInputOffset)
408     {
409         int ch = input.readChecked(negativeInputOffset);
410         return (loChar == ch) || (hiChar == ch);
411     }
412
413     bool checkCharacterClass(CharacterClass* characterClass, bool invert, unsigned negativeInputOffset)
414     {
415         bool match = testCharacterClass(characterClass, input.readChecked(negativeInputOffset));
416         return invert ? !match : match;
417     }
418
419     bool tryConsumeBackReference(int matchBegin, int matchEnd, unsigned negativeInputOffset)
420     {
421         unsigned matchSize = (unsigned)(matchEnd - matchBegin);
422
423         if (!input.checkInput(matchSize))
424             return false;
425
426         for (unsigned i = 0; i < matchSize; ++i) {
427             int oldCh = input.reread(matchBegin + i);
428             int ch;
429             if (!U_IS_BMP(oldCh)) {
430                 ch = input.readSurrogatePairChecked(negativeInputOffset + matchSize - i);
431                 ++i;
432             } else
433                 ch = input.readChecked(negativeInputOffset + matchSize - i);
434
435             if (oldCh == ch)
436                 continue;
437
438             if (pattern->ignoreCase()) {
439                 // See ES 6.0, 21.2.2.8.2 for the definition of Canonicalize(). For non-Unicode
440                 // patterns, Unicode values are never allowed to match against ASCII ones.
441                 // For Unicode, we need to check all canonical equivalents of a character.
442                 if (!unicode && (isASCII(oldCh) || isASCII(ch))) {
443                     if (toASCIIUpper(oldCh) == toASCIIUpper(ch))
444                         continue;
445                 } else if (areCanonicallyEquivalent(oldCh, ch, unicode ? CanonicalMode::Unicode : CanonicalMode::UCS2))
446                     continue;
447             }
448
449             input.uncheckInput(matchSize);
450             return false;
451         }
452
453         return true;
454     }
455
456     bool matchAssertionBOL(ByteTerm& term)
457     {
458         return (input.atStart(term.inputPosition)) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + 1)));
459     }
460
461     bool matchAssertionEOL(ByteTerm& term)
462     {
463         if (term.inputPosition)
464             return (input.atEnd(term.inputPosition)) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
465
466         return (input.atEnd()) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.read()));
467     }
468
469     bool matchAssertionWordBoundary(ByteTerm& term)
470     {
471         bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition + 1));
472         bool readIsWordchar;
473         if (term.inputPosition)
474             readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition));
475         else
476             readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read());
477
478         bool wordBoundary = prevIsWordchar != readIsWordchar;
479         return term.invert() ? !wordBoundary : wordBoundary;
480     }
481
482     bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context)
483     {
484         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
485
486         switch (term.atom.quantityType) {
487         case QuantifierFixedCount:
488             break;
489
490         case QuantifierGreedy:
491             if (backTrack->matchAmount) {
492                 --backTrack->matchAmount;
493                 input.uncheckInput(U16_LENGTH(term.atom.patternCharacter));
494                 return true;
495             }
496             break;
497
498         case QuantifierNonGreedy:
499             if ((backTrack->matchAmount < term.atom.quantityMaxCount) && input.checkInput(1)) {
500                 ++backTrack->matchAmount;
501                 if (checkCharacter(term.atom.patternCharacter, term.inputPosition + 1))
502                     return true;
503             }
504             input.setPos(backTrack->begin);
505             break;
506         }
507
508         return false;
509     }
510
511     bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context)
512     {
513         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
514
515         switch (term.atom.quantityType) {
516         case QuantifierFixedCount:
517             break;
518
519         case QuantifierGreedy:
520             if (backTrack->matchAmount) {
521                 --backTrack->matchAmount;
522                 input.uncheckInput(1);
523                 return true;
524             }
525             break;
526
527         case QuantifierNonGreedy:
528             if ((backTrack->matchAmount < term.atom.quantityMaxCount) && input.checkInput(1)) {
529                 ++backTrack->matchAmount;
530                 if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition + 1))
531                     return true;
532             }
533             input.uncheckInput(backTrack->matchAmount);
534             break;
535         }
536
537         return false;
538     }
539
540     bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context)
541     {
542         ASSERT(term.type == ByteTerm::TypeCharacterClass);
543         BackTrackInfoCharacterClass* backTrack = reinterpret_cast<BackTrackInfoCharacterClass*>(context->frame + term.frameLocation);
544
545         switch (term.atom.quantityType) {
546         case QuantifierFixedCount: {
547             if (unicode) {
548                 backTrack->begin = input.getPos();
549                 unsigned matchAmount = 0;
550                 for (matchAmount = 0; matchAmount < term.atom.quantityMaxCount; ++matchAmount) {
551                     if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount)) {
552                         input.setPos(backTrack->begin);
553                         return false;
554                     }
555                 }
556
557                 return true;
558             }
559
560             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityMaxCount; ++matchAmount) {
561                 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount))
562                     return false;
563             }
564             return true;
565         }
566
567         case QuantifierGreedy: {
568             unsigned position = input.getPos();
569             backTrack->begin = position;
570             unsigned matchAmount = 0;
571             while ((matchAmount < term.atom.quantityMaxCount) && input.checkInput(1)) {
572                 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) {
573                     input.setPos(position);
574                     break;
575                 }
576                 ++matchAmount;
577                 position = input.getPos();
578             }
579             backTrack->matchAmount = matchAmount;
580
581             return true;
582         }
583
584         case QuantifierNonGreedy:
585             backTrack->begin = input.getPos();
586             backTrack->matchAmount = 0;
587             return true;
588         }
589
590         RELEASE_ASSERT_NOT_REACHED();
591         return false;
592     }
593
594     bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context)
595     {
596         ASSERT(term.type == ByteTerm::TypeCharacterClass);
597         BackTrackInfoCharacterClass* backTrack = reinterpret_cast<BackTrackInfoCharacterClass*>(context->frame + term.frameLocation);
598
599         switch (term.atom.quantityType) {
600         case QuantifierFixedCount:
601             if (unicode)
602                 input.setPos(backTrack->begin);
603             break;
604
605         case QuantifierGreedy:
606             if (backTrack->matchAmount) {
607                 if (unicode) {
608                     // Rematch one less match
609                     input.setPos(backTrack->begin);
610                     --backTrack->matchAmount;
611                     for (unsigned matchAmount = 0; (matchAmount < backTrack->matchAmount) && input.checkInput(1); ++matchAmount) {
612                         if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) {
613                             input.uncheckInput(1);
614                             break;
615                         }
616                     }
617                     return true;
618                 }
619                 --backTrack->matchAmount;
620                 input.uncheckInput(1);
621                 return true;
622             }
623             break;
624
625         case QuantifierNonGreedy:
626             if ((backTrack->matchAmount < term.atom.quantityMaxCount) && input.checkInput(1)) {
627                 ++backTrack->matchAmount;
628                 if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1))
629                     return true;
630             }
631             input.setPos(backTrack->begin);
632             break;
633         }
634
635         return false;
636     }
637
638     bool matchBackReference(ByteTerm& term, DisjunctionContext* context)
639     {
640         ASSERT(term.type == ByteTerm::TypeBackReference);
641         BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
642
643         unsigned matchBegin = output[(term.atom.subpatternId << 1)];
644         unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1];
645
646         // If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that.
647         // In this case the result of match is empty string like when it references to a parentheses with zero-width match.
648         // Eg.: /(a\1)/
649         if (matchEnd == offsetNoMatch)
650             return true;
651
652         if (matchBegin == offsetNoMatch)
653             return true;
654
655         ASSERT(matchBegin <= matchEnd);
656
657         if (matchBegin == matchEnd)
658             return true;
659
660         switch (term.atom.quantityType) {
661         case QuantifierFixedCount: {
662             backTrack->begin = input.getPos();
663             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityMaxCount; ++matchAmount) {
664                 if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
665                     input.setPos(backTrack->begin);
666                     return false;
667                 }
668             }
669             return true;
670         }
671
672         case QuantifierGreedy: {
673             unsigned matchAmount = 0;
674             while ((matchAmount < term.atom.quantityMaxCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition))
675                 ++matchAmount;
676             backTrack->matchAmount = matchAmount;
677             return true;
678         }
679
680         case QuantifierNonGreedy:
681             backTrack->begin = input.getPos();
682             backTrack->matchAmount = 0;
683             return true;
684         }
685
686         RELEASE_ASSERT_NOT_REACHED();
687         return false;
688     }
689
690     bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context)
691     {
692         ASSERT(term.type == ByteTerm::TypeBackReference);
693         BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
694
695         unsigned matchBegin = output[(term.atom.subpatternId << 1)];
696         unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1];
697
698         if (matchBegin == offsetNoMatch)
699             return false;
700
701         ASSERT(matchBegin <= matchEnd);
702
703         if (matchBegin == matchEnd)
704             return false;
705
706         switch (term.atom.quantityType) {
707         case QuantifierFixedCount:
708             // for quantityMaxCount == 1, could rewind.
709             input.setPos(backTrack->begin);
710             break;
711
712         case QuantifierGreedy:
713             if (backTrack->matchAmount) {
714                 --backTrack->matchAmount;
715                 input.rewind(matchEnd - matchBegin);
716                 return true;
717             }
718             break;
719
720         case QuantifierNonGreedy:
721             if ((backTrack->matchAmount < term.atom.quantityMaxCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
722                 ++backTrack->matchAmount;
723                 return true;
724             }
725             input.setPos(backTrack->begin);
726             break;
727         }
728
729         return false;
730     }
731
732     void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context)
733     {
734         if (term.capture()) {
735             unsigned subpatternId = term.atom.subpatternId;
736             output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin - term.inputPosition;
737             output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd - term.inputPosition;
738         }
739     }
740     void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context)
741     {
742         unsigned firstSubpatternId = term.atom.subpatternId;
743         unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
744         context->restoreOutput(output, firstSubpatternId, count);
745     }
746     JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
747     {
748         while (backTrack->matchAmount) {
749             ParenthesesDisjunctionContext* context = backTrack->lastContext;
750
751             JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true);
752             if (result == JSRegExpMatch)
753                 return JSRegExpMatch;
754
755             resetMatches(term, context);
756             popParenthesesDisjunctionContext(backTrack);
757             freeParenthesesDisjunctionContext(context);
758
759             if (result != JSRegExpNoMatch)
760                 return result;
761         }
762
763         return JSRegExpNoMatch;
764     }
765
766     bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
767     {
768         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
769         ASSERT(term.atom.quantityMaxCount == 1);
770
771         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
772
773         switch (term.atom.quantityType) {
774         case QuantifierGreedy: {
775             // set this speculatively; if we get to the parens end this will be true.
776             backTrack->begin = input.getPos();
777             break;
778         }
779         case QuantifierNonGreedy: {
780             backTrack->begin = notFound;
781             context->term += term.atom.parenthesesWidth;
782             return true;
783         }
784         case QuantifierFixedCount:
785             break;
786         }
787
788         if (term.capture()) {
789             unsigned subpatternId = term.atom.subpatternId;
790             output[(subpatternId << 1)] = input.getPos() - term.inputPosition;
791         }
792
793         return true;
794     }
795
796     bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
797     {
798         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
799         ASSERT(term.atom.quantityMaxCount == 1);
800
801         if (term.capture()) {
802             unsigned subpatternId = term.atom.subpatternId;
803             output[(subpatternId << 1) + 1] = input.getPos() - term.inputPosition;
804         }
805
806         if (term.atom.quantityType == QuantifierFixedCount)
807             return true;
808
809         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
810         return backTrack->begin != input.getPos();
811     }
812
813     bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
814     {
815         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
816         ASSERT(term.atom.quantityMaxCount == 1);
817
818         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
819
820         if (term.capture()) {
821             unsigned subpatternId = term.atom.subpatternId;
822             output[(subpatternId << 1)] = offsetNoMatch;
823             output[(subpatternId << 1) + 1] = offsetNoMatch;
824         }
825
826         switch (term.atom.quantityType) {
827         case QuantifierGreedy:
828             // if we backtrack to this point, there is another chance - try matching nothing.
829             ASSERT(backTrack->begin != notFound);
830             backTrack->begin = notFound;
831             context->term += term.atom.parenthesesWidth;
832             return true;
833         case QuantifierNonGreedy:
834             ASSERT(backTrack->begin != notFound);
835             FALLTHROUGH;
836         case QuantifierFixedCount:
837             break;
838         }
839
840         return false;
841     }
842
843     bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
844     {
845         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
846         ASSERT(term.atom.quantityMaxCount == 1);
847
848         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
849
850         switch (term.atom.quantityType) {
851         case QuantifierGreedy:
852             if (backTrack->begin == notFound) {
853                 context->term -= term.atom.parenthesesWidth;
854                 return false;
855             }
856             FALLTHROUGH;
857         case QuantifierNonGreedy:
858             if (backTrack->begin == notFound) {
859                 backTrack->begin = input.getPos();
860                 if (term.capture()) {
861                     // Technically this access to inputPosition should be accessing the begin term's
862                     // inputPosition, but for repeats other than fixed these values should be
863                     // the same anyway! (We don't pre-check for greedy or non-greedy matches.)
864                     ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
865                     ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition);
866                     unsigned subpatternId = term.atom.subpatternId;
867                     output[subpatternId << 1] = input.getPos() - term.inputPosition;
868                 }
869                 context->term -= term.atom.parenthesesWidth;
870                 return true;
871             }
872             FALLTHROUGH;
873         case QuantifierFixedCount:
874             break;
875         }
876
877         return false;
878     }
879
880     bool matchParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
881     {
882         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
883         ASSERT(term.atom.quantityType == QuantifierGreedy);
884         ASSERT(term.atom.quantityMaxCount == quantifyInfinite);
885         ASSERT(!term.capture());
886
887         BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
888         backTrack->begin = input.getPos();
889         return true;
890     }
891
892     bool matchParenthesesTerminalEnd(ByteTerm& term, DisjunctionContext* context)
893     {
894         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalEnd);
895
896         BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
897         // Empty match is a failed match.
898         if (backTrack->begin == input.getPos())
899             return false;
900
901         // Successful match! Okay, what's next? - loop around and try to match more!
902         context->term -= (term.atom.parenthesesWidth + 1);
903         return true;
904     }
905
906     bool backtrackParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
907     {
908         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
909         ASSERT(term.atom.quantityType == QuantifierGreedy);
910         ASSERT(term.atom.quantityMaxCount == quantifyInfinite);
911         ASSERT(!term.capture());
912
913         // If we backtrack to this point, we have failed to match this iteration of the parens.
914         // Since this is greedy / zero minimum a failed is also accepted as a match!
915         context->term += term.atom.parenthesesWidth;
916         return true;
917     }
918
919     bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*)
920     {
921         // 'Terminal' parentheses are at the end of the regex, and as such a match past end
922         // should always be returned as a successful match - we should never backtrack to here.
923         RELEASE_ASSERT_NOT_REACHED();
924         return false;
925     }
926
927     bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
928     {
929         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
930         ASSERT(term.atom.quantityMaxCount == 1);
931
932         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
933
934         backTrack->begin = input.getPos();
935         return true;
936     }
937
938     bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
939     {
940         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
941         ASSERT(term.atom.quantityMaxCount == 1);
942
943         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
944
945         input.setPos(backTrack->begin);
946
947         // We've reached the end of the parens; if they are inverted, this is failure.
948         if (term.invert()) {
949             context->term -= term.atom.parenthesesWidth;
950             return false;
951         }
952
953         return true;
954     }
955
956     bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
957     {
958         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
959         ASSERT(term.atom.quantityMaxCount == 1);
960
961         // We've failed to match parens; if they are inverted, this is win!
962         if (term.invert()) {
963             context->term += term.atom.parenthesesWidth;
964             return true;
965         }
966
967         return false;
968     }
969
970     bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
971     {
972         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
973         ASSERT(term.atom.quantityMaxCount == 1);
974
975         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
976
977         input.setPos(backTrack->begin);
978
979         context->term -= term.atom.parenthesesWidth;
980         return false;
981     }
982
983     JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context)
984     {
985         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
986
987         BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
988         ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
989
990         backTrack->matchAmount = 0;
991         backTrack->lastContext = 0;
992
993         ASSERT(term.atom.quantityType != QuantifierFixedCount || term.atom.quantityMinCount == term.atom.quantityMaxCount);
994
995         unsigned minimumMatchCount = term.atom.quantityMinCount;
996         JSRegExpResult fixedMatchResult;
997
998         // Handle fixed matches and the minimum part of a variable length match.
999         if (minimumMatchCount) {
1000             // While we haven't yet reached our fixed limit,
1001             while (backTrack->matchAmount < minimumMatchCount) {
1002                 // Try to do a match, and it it succeeds, add it to the list.
1003                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1004                 fixedMatchResult = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1005                 if (fixedMatchResult == JSRegExpMatch)
1006                     appendParenthesesDisjunctionContext(backTrack, context);
1007                 else {
1008                     // The match failed; try to find an alternate point to carry on from.
1009                     resetMatches(term, context);
1010                     freeParenthesesDisjunctionContext(context);
1011                     
1012                     if (fixedMatchResult != JSRegExpNoMatch)
1013                         return fixedMatchResult;
1014                     JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
1015                     if (backtrackResult != JSRegExpMatch)
1016                         return backtrackResult;
1017                 }
1018             }
1019
1020             ParenthesesDisjunctionContext* context = backTrack->lastContext;
1021             recordParenthesesMatch(term, context);
1022         }
1023
1024         switch (term.atom.quantityType) {
1025         case QuantifierFixedCount: {
1026             ASSERT(backTrack->matchAmount == term.atom.quantityMaxCount);
1027             return JSRegExpMatch;
1028         }
1029
1030         case QuantifierGreedy: {
1031             while (backTrack->matchAmount < term.atom.quantityMaxCount) {
1032                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1033                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1034                 if (result == JSRegExpMatch)
1035                     appendParenthesesDisjunctionContext(backTrack, context);
1036                 else {
1037                     resetMatches(term, context);
1038                     freeParenthesesDisjunctionContext(context);
1039
1040                     if (result != JSRegExpNoMatch)
1041                         return result;
1042
1043                     break;
1044                 }
1045             }
1046
1047             if (backTrack->matchAmount) {
1048                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
1049                 recordParenthesesMatch(term, context);
1050             }
1051             return JSRegExpMatch;
1052         }
1053
1054         case QuantifierNonGreedy:
1055             return JSRegExpMatch;
1056         }
1057
1058         RELEASE_ASSERT_NOT_REACHED();
1059         return JSRegExpErrorNoMatch;
1060     }
1061
1062     // Rules for backtracking differ depending on whether this is greedy or non-greedy.
1063     //
1064     // Greedy matches never should try just adding more - you should already have done
1065     // the 'more' cases.  Always backtrack, at least a leetle bit.  However cases where
1066     // you backtrack an item off the list needs checking, since we'll never have matched
1067     // the one less case.  Tracking forwards, still add as much as possible.
1068     //
1069     // Non-greedy, we've already done the one less case, so don't match on popping.
1070     // We haven't done the one more case, so always try to add that.
1071     //
1072     JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
1073     {
1074         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
1075
1076         BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
1077         ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
1078
1079         switch (term.atom.quantityType) {
1080         case QuantifierFixedCount: {
1081             ASSERT(backTrack->matchAmount == term.atom.quantityMaxCount);
1082
1083             ParenthesesDisjunctionContext* context = 0;
1084             JSRegExpResult result = parenthesesDoBacktrack(term, backTrack);
1085
1086             if (result != JSRegExpMatch)
1087                 return result;
1088
1089             // While we haven't yet reached our fixed limit,
1090             while (backTrack->matchAmount < term.atom.quantityMaxCount) {
1091                 // Try to do a match, and it it succeeds, add it to the list.
1092                 context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1093                 result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1094
1095                 if (result == JSRegExpMatch)
1096                     appendParenthesesDisjunctionContext(backTrack, context);
1097                 else {
1098                     // The match failed; try to find an alternate point to carry on from.
1099                     resetMatches(term, context);
1100                     freeParenthesesDisjunctionContext(context);
1101
1102                     if (result != JSRegExpNoMatch)
1103                         return result;
1104                     JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
1105                     if (backtrackResult != JSRegExpMatch)
1106                         return backtrackResult;
1107                 }
1108             }
1109
1110             ASSERT(backTrack->matchAmount == term.atom.quantityMaxCount);
1111             context = backTrack->lastContext;
1112             recordParenthesesMatch(term, context);
1113             return JSRegExpMatch;
1114         }
1115
1116         case QuantifierGreedy: {
1117             if (!backTrack->matchAmount)
1118                 return JSRegExpNoMatch;
1119
1120             ParenthesesDisjunctionContext* context = backTrack->lastContext;
1121             JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
1122             if (result == JSRegExpMatch) {
1123                 while (backTrack->matchAmount < term.atom.quantityMaxCount) {
1124                     ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1125                     JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1126                     if (parenthesesResult == JSRegExpMatch)
1127                         appendParenthesesDisjunctionContext(backTrack, context);
1128                     else {
1129                         resetMatches(term, context);
1130                         freeParenthesesDisjunctionContext(context);
1131
1132                         if (parenthesesResult != JSRegExpNoMatch)
1133                             return parenthesesResult;
1134
1135                         break;
1136                     }
1137                 }
1138             } else {
1139                 resetMatches(term, context);
1140                 popParenthesesDisjunctionContext(backTrack);
1141                 freeParenthesesDisjunctionContext(context);
1142
1143                 if (result != JSRegExpNoMatch || backTrack->matchAmount < term.atom.quantityMinCount)
1144                     return result;
1145             }
1146
1147             if (backTrack->matchAmount) {
1148                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
1149                 recordParenthesesMatch(term, context);
1150             }
1151             return JSRegExpMatch;
1152         }
1153
1154         case QuantifierNonGreedy: {
1155             // If we've not reached the limit, try to add one more match.
1156             if (backTrack->matchAmount < term.atom.quantityMaxCount) {
1157                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1158                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1159                 if (result == JSRegExpMatch) {
1160                     appendParenthesesDisjunctionContext(backTrack, context);
1161                     recordParenthesesMatch(term, context);
1162                     return JSRegExpMatch;
1163                 }
1164
1165                 resetMatches(term, context);
1166                 freeParenthesesDisjunctionContext(context);
1167
1168                 if (result != JSRegExpNoMatch)
1169                     return result;
1170             }
1171
1172             // Nope - okay backtrack looking for an alternative.
1173             while (backTrack->matchAmount) {
1174                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
1175                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
1176                 if (result == JSRegExpMatch) {
1177                     // successful backtrack! we're back in the game!
1178                     if (backTrack->matchAmount) {
1179                         context = backTrack->lastContext;
1180                         recordParenthesesMatch(term, context);
1181                     }
1182                     return JSRegExpMatch;
1183                 }
1184
1185                 // pop a match off the stack
1186                 resetMatches(term, context);
1187                 popParenthesesDisjunctionContext(backTrack);
1188                 freeParenthesesDisjunctionContext(context);
1189
1190                 if (result != JSRegExpNoMatch)
1191                     return result;
1192             }
1193
1194             return JSRegExpNoMatch;
1195         }
1196         }
1197
1198         RELEASE_ASSERT_NOT_REACHED();
1199         return JSRegExpErrorNoMatch;
1200     }
1201
1202     bool matchDotStarEnclosure(ByteTerm& term, DisjunctionContext* context)
1203     {
1204         UNUSED_PARAM(term);
1205
1206         if (pattern->dotAll()) {
1207             context->matchBegin = startOffset;
1208             context->matchEnd = input.end();
1209             return true;
1210         }
1211
1212         unsigned matchBegin = context->matchBegin;
1213
1214         if (matchBegin > startOffset) {
1215             for (matchBegin--; true; matchBegin--) {
1216                 if (testCharacterClass(pattern->newlineCharacterClass, input.reread(matchBegin))) {
1217                     ++matchBegin;
1218                     break;
1219                 }
1220
1221                 if (matchBegin == startOffset)
1222                     break;
1223             }
1224         }
1225
1226         unsigned matchEnd = input.getPos();
1227
1228         for (; (matchEnd != input.end())
1229              && (!testCharacterClass(pattern->newlineCharacterClass, input.reread(matchEnd))); matchEnd++) { }
1230
1231         if (((matchBegin && term.anchors.m_bol)
1232              || ((matchEnd != input.end()) && term.anchors.m_eol))
1233             && !pattern->multiline())
1234             return false;
1235
1236         context->matchBegin = matchBegin;
1237         context->matchEnd = matchEnd;
1238         return true;
1239     }
1240
1241 #define MATCH_NEXT() { ++context->term; goto matchAgain; }
1242 #define BACKTRACK() { --context->term; goto backtrack; }
1243 #define currentTerm() (disjunction->terms[context->term])
1244     JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1245     {
1246         if (!--remainingMatchCount)
1247             return JSRegExpErrorHitLimit;
1248
1249         if (btrack)
1250             BACKTRACK();
1251
1252         context->matchBegin = input.getPos();
1253         context->term = 0;
1254
1255     matchAgain:
1256         ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1257
1258         switch (currentTerm().type) {
1259         case ByteTerm::TypeSubpatternBegin:
1260             MATCH_NEXT();
1261         case ByteTerm::TypeSubpatternEnd:
1262             context->matchEnd = input.getPos();
1263             return JSRegExpMatch;
1264
1265         case ByteTerm::TypeBodyAlternativeBegin:
1266             MATCH_NEXT();
1267         case ByteTerm::TypeBodyAlternativeDisjunction:
1268         case ByteTerm::TypeBodyAlternativeEnd:
1269             context->matchEnd = input.getPos();
1270             return JSRegExpMatch;
1271
1272         case ByteTerm::TypeAlternativeBegin:
1273             MATCH_NEXT();
1274         case ByteTerm::TypeAlternativeDisjunction:
1275         case ByteTerm::TypeAlternativeEnd: {
1276             int offset = currentTerm().alternative.end;
1277             BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1278             backTrack->offset = offset;
1279             context->term += offset;
1280             MATCH_NEXT();
1281         }
1282
1283         case ByteTerm::TypeAssertionBOL:
1284             if (matchAssertionBOL(currentTerm()))
1285                 MATCH_NEXT();
1286             BACKTRACK();
1287         case ByteTerm::TypeAssertionEOL:
1288             if (matchAssertionEOL(currentTerm()))
1289                 MATCH_NEXT();
1290             BACKTRACK();
1291         case ByteTerm::TypeAssertionWordBoundary:
1292             if (matchAssertionWordBoundary(currentTerm()))
1293                 MATCH_NEXT();
1294             BACKTRACK();
1295
1296         case ByteTerm::TypePatternCharacterOnce:
1297         case ByteTerm::TypePatternCharacterFixed: {
1298             if (unicode) {
1299                 if (!U_IS_BMP(currentTerm().atom.patternCharacter)) {
1300                     for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) {
1301                         if (!checkSurrogatePair(currentTerm().atom.patternCharacter, currentTerm().inputPosition - 2 * matchAmount)) {
1302                             BACKTRACK();
1303                         }
1304                     }
1305                     MATCH_NEXT();
1306                 }
1307             }
1308             unsigned position = input.getPos(); // May need to back out reading a surrogate pair.
1309
1310             for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) {
1311                 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - matchAmount)) {
1312                     input.setPos(position);
1313                     BACKTRACK();
1314                 }
1315             }
1316             MATCH_NEXT();
1317         }
1318         case ByteTerm::TypePatternCharacterGreedy: {
1319             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1320             unsigned matchAmount = 0;
1321             unsigned position = input.getPos(); // May need to back out reading a surrogate pair.
1322             while ((matchAmount < currentTerm().atom.quantityMaxCount) && input.checkInput(1)) {
1323                 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + 1)) {
1324                     input.setPos(position);
1325                     break;
1326                 }
1327                 ++matchAmount;
1328                 position = input.getPos();
1329             }
1330             backTrack->matchAmount = matchAmount;
1331
1332             MATCH_NEXT();
1333         }
1334         case ByteTerm::TypePatternCharacterNonGreedy: {
1335             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1336             backTrack->begin = input.getPos();
1337             backTrack->matchAmount = 0;
1338             MATCH_NEXT();
1339         }
1340
1341         case ByteTerm::TypePatternCasedCharacterOnce:
1342         case ByteTerm::TypePatternCasedCharacterFixed: {
1343             if (unicode) {
1344                 // Case insensitive matching of unicode characters is handled as TypeCharacterClass.
1345                 ASSERT(U_IS_BMP(currentTerm().atom.patternCharacter));
1346
1347                 unsigned position = input.getPos(); // May need to back out reading a surrogate pair.
1348                 
1349                 for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) {
1350                     if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount)) {
1351                         input.setPos(position);
1352                         BACKTRACK();
1353                     }
1354                 }
1355                 MATCH_NEXT();
1356             }
1357
1358             for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) {
1359                 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount))
1360                     BACKTRACK();
1361             }
1362             MATCH_NEXT();
1363         }
1364         case ByteTerm::TypePatternCasedCharacterGreedy: {
1365             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1366
1367             // Case insensitive matching of unicode characters is handled as TypeCharacterClass.
1368             ASSERT(!unicode || U_IS_BMP(currentTerm().atom.patternCharacter));
1369
1370             unsigned matchAmount = 0;
1371             while ((matchAmount < currentTerm().atom.quantityMaxCount) && input.checkInput(1)) {
1372                 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + 1)) {
1373                     input.uncheckInput(1);
1374                     break;
1375                 }
1376                 ++matchAmount;
1377             }
1378             backTrack->matchAmount = matchAmount;
1379
1380             MATCH_NEXT();
1381         }
1382         case ByteTerm::TypePatternCasedCharacterNonGreedy: {
1383             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1384
1385             // Case insensitive matching of unicode characters is handled as TypeCharacterClass.
1386             ASSERT(!unicode || U_IS_BMP(currentTerm().atom.patternCharacter));
1387             
1388             backTrack->matchAmount = 0;
1389             MATCH_NEXT();
1390         }
1391
1392         case ByteTerm::TypeCharacterClass:
1393             if (matchCharacterClass(currentTerm(), context))
1394                 MATCH_NEXT();
1395             BACKTRACK();
1396         case ByteTerm::TypeBackReference:
1397             if (matchBackReference(currentTerm(), context))
1398                 MATCH_NEXT();
1399             BACKTRACK();
1400         case ByteTerm::TypeParenthesesSubpattern: {
1401             JSRegExpResult result = matchParentheses(currentTerm(), context);
1402
1403             if (result == JSRegExpMatch) {
1404                 MATCH_NEXT();
1405             }  else if (result != JSRegExpNoMatch)
1406                 return result;
1407
1408             BACKTRACK();
1409         }
1410         case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1411             if (matchParenthesesOnceBegin(currentTerm(), context))
1412                 MATCH_NEXT();
1413             BACKTRACK();
1414         case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1415             if (matchParenthesesOnceEnd(currentTerm(), context))
1416                 MATCH_NEXT();
1417             BACKTRACK();
1418         case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1419             if (matchParenthesesTerminalBegin(currentTerm(), context))
1420                 MATCH_NEXT();
1421             BACKTRACK();
1422         case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1423             if (matchParenthesesTerminalEnd(currentTerm(), context))
1424                 MATCH_NEXT();
1425             BACKTRACK();
1426         case ByteTerm::TypeParentheticalAssertionBegin:
1427             if (matchParentheticalAssertionBegin(currentTerm(), context))
1428                 MATCH_NEXT();
1429             BACKTRACK();
1430         case ByteTerm::TypeParentheticalAssertionEnd:
1431             if (matchParentheticalAssertionEnd(currentTerm(), context))
1432                 MATCH_NEXT();
1433             BACKTRACK();
1434
1435         case ByteTerm::TypeCheckInput:
1436             if (input.checkInput(currentTerm().checkInputCount))
1437                 MATCH_NEXT();
1438             BACKTRACK();
1439
1440         case ByteTerm::TypeUncheckInput:
1441             input.uncheckInput(currentTerm().checkInputCount);
1442             MATCH_NEXT();
1443                 
1444         case ByteTerm::TypeDotStarEnclosure:
1445             if (matchDotStarEnclosure(currentTerm(), context))
1446                 return JSRegExpMatch;
1447             BACKTRACK();
1448         }
1449
1450         // We should never fall-through to here.
1451         RELEASE_ASSERT_NOT_REACHED();
1452
1453     backtrack:
1454         ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1455
1456         switch (currentTerm().type) {
1457         case ByteTerm::TypeSubpatternBegin:
1458             return JSRegExpNoMatch;
1459         case ByteTerm::TypeSubpatternEnd:
1460             RELEASE_ASSERT_NOT_REACHED();
1461
1462         case ByteTerm::TypeBodyAlternativeBegin:
1463         case ByteTerm::TypeBodyAlternativeDisjunction: {
1464             int offset = currentTerm().alternative.next;
1465             context->term += offset;
1466             if (offset > 0)
1467                 MATCH_NEXT();
1468
1469             if (input.atEnd() || pattern->sticky())
1470                 return JSRegExpNoMatch;
1471
1472             input.next();
1473
1474             context->matchBegin = input.getPos();
1475
1476             if (currentTerm().alternative.onceThrough)
1477                 context->term += currentTerm().alternative.next;
1478
1479             MATCH_NEXT();
1480         }
1481         case ByteTerm::TypeBodyAlternativeEnd:
1482             RELEASE_ASSERT_NOT_REACHED();
1483
1484         case ByteTerm::TypeAlternativeBegin:
1485         case ByteTerm::TypeAlternativeDisjunction: {
1486             int offset = currentTerm().alternative.next;
1487             context->term += offset;
1488             if (offset > 0)
1489                 MATCH_NEXT();
1490             BACKTRACK();
1491         }
1492         case ByteTerm::TypeAlternativeEnd: {
1493             // We should never backtrack back into an alternative of the main body of the regex.
1494             BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1495             unsigned offset = backTrack->offset;
1496             context->term -= offset;
1497             BACKTRACK();
1498         }
1499
1500         case ByteTerm::TypeAssertionBOL:
1501         case ByteTerm::TypeAssertionEOL:
1502         case ByteTerm::TypeAssertionWordBoundary:
1503             BACKTRACK();
1504
1505         case ByteTerm::TypePatternCharacterOnce:
1506         case ByteTerm::TypePatternCharacterFixed:
1507         case ByteTerm::TypePatternCharacterGreedy:
1508         case ByteTerm::TypePatternCharacterNonGreedy:
1509             if (backtrackPatternCharacter(currentTerm(), context))
1510                 MATCH_NEXT();
1511             BACKTRACK();
1512         case ByteTerm::TypePatternCasedCharacterOnce:
1513         case ByteTerm::TypePatternCasedCharacterFixed:
1514         case ByteTerm::TypePatternCasedCharacterGreedy:
1515         case ByteTerm::TypePatternCasedCharacterNonGreedy:
1516             if (backtrackPatternCasedCharacter(currentTerm(), context))
1517                 MATCH_NEXT();
1518             BACKTRACK();
1519         case ByteTerm::TypeCharacterClass:
1520             if (backtrackCharacterClass(currentTerm(), context))
1521                 MATCH_NEXT();
1522             BACKTRACK();
1523         case ByteTerm::TypeBackReference:
1524             if (backtrackBackReference(currentTerm(), context))
1525                 MATCH_NEXT();
1526             BACKTRACK();
1527         case ByteTerm::TypeParenthesesSubpattern: {
1528             JSRegExpResult result = backtrackParentheses(currentTerm(), context);
1529
1530             if (result == JSRegExpMatch) {
1531                 MATCH_NEXT();
1532             } else if (result != JSRegExpNoMatch)
1533                 return result;
1534
1535             BACKTRACK();
1536         }
1537         case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1538             if (backtrackParenthesesOnceBegin(currentTerm(), context))
1539                 MATCH_NEXT();
1540             BACKTRACK();
1541         case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1542             if (backtrackParenthesesOnceEnd(currentTerm(), context))
1543                 MATCH_NEXT();
1544             BACKTRACK();
1545         case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1546             if (backtrackParenthesesTerminalBegin(currentTerm(), context))
1547                 MATCH_NEXT();
1548             BACKTRACK();
1549         case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1550             if (backtrackParenthesesTerminalEnd(currentTerm(), context))
1551                 MATCH_NEXT();
1552             BACKTRACK();
1553         case ByteTerm::TypeParentheticalAssertionBegin:
1554             if (backtrackParentheticalAssertionBegin(currentTerm(), context))
1555                 MATCH_NEXT();
1556             BACKTRACK();
1557         case ByteTerm::TypeParentheticalAssertionEnd:
1558             if (backtrackParentheticalAssertionEnd(currentTerm(), context))
1559                 MATCH_NEXT();
1560             BACKTRACK();
1561
1562         case ByteTerm::TypeCheckInput:
1563             input.uncheckInput(currentTerm().checkInputCount);
1564             BACKTRACK();
1565
1566         case ByteTerm::TypeUncheckInput:
1567             input.checkInput(currentTerm().checkInputCount);
1568             BACKTRACK();
1569
1570         case ByteTerm::TypeDotStarEnclosure:
1571             RELEASE_ASSERT_NOT_REACHED();
1572         }
1573
1574         RELEASE_ASSERT_NOT_REACHED();
1575         return JSRegExpErrorNoMatch;
1576     }
1577
1578     JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1579     {
1580         JSRegExpResult result = matchDisjunction(disjunction, context, btrack);
1581
1582         if (result == JSRegExpMatch) {
1583             while (context->matchBegin == context->matchEnd) {
1584                 result = matchDisjunction(disjunction, context, true);
1585                 if (result != JSRegExpMatch)
1586                     return result;
1587             }
1588             return JSRegExpMatch;
1589         }
1590
1591         return result;
1592     }
1593
1594     unsigned interpret()
1595     {
1596         if (!input.isAvailableInput(0))
1597             return offsetNoMatch;
1598
1599         if (pattern->m_lock)
1600             pattern->m_lock->lock();
1601         
1602         for (unsigned i = 0; i < pattern->m_body->m_numSubpatterns + 1; ++i)
1603             output[i << 1] = offsetNoMatch;
1604
1605         allocatorPool = pattern->m_allocator->startAllocator();
1606         RELEASE_ASSERT(allocatorPool);
1607
1608         DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
1609
1610         JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false);
1611         if (result == JSRegExpMatch) {
1612             output[0] = context->matchBegin;
1613             output[1] = context->matchEnd;
1614         }
1615
1616         freeDisjunctionContext(context);
1617
1618         pattern->m_allocator->stopAllocator();
1619
1620         ASSERT((result == JSRegExpMatch) == (output[0] != offsetNoMatch));
1621
1622         if (pattern->m_lock)
1623             pattern->m_lock->unlock();
1624         
1625         return output[0];
1626     }
1627
1628     Interpreter(BytecodePattern* pattern, unsigned* output, const CharType* input, unsigned length, unsigned start)
1629         : pattern(pattern)
1630         , unicode(pattern->unicode())
1631         , output(output)
1632         , input(input, start, length, pattern->unicode())
1633         , allocatorPool(0)
1634         , startOffset(start)
1635         , remainingMatchCount(matchLimit)
1636     {
1637     }
1638
1639 private:
1640     BytecodePattern* pattern;
1641     bool unicode;
1642     unsigned* output;
1643     InputStream input;
1644     BumpPointerPool* allocatorPool;
1645     unsigned startOffset;
1646     unsigned remainingMatchCount;
1647 };
1648
1649 class ByteCompiler {
1650     struct ParenthesesStackEntry {
1651         unsigned beginTerm;
1652         unsigned savedAlternativeIndex;
1653         ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/)
1654             : beginTerm(beginTerm)
1655             , savedAlternativeIndex(savedAlternativeIndex)
1656         {
1657         }
1658     };
1659
1660 public:
1661     ByteCompiler(YarrPattern& pattern)
1662         : m_pattern(pattern)
1663     {
1664         m_currentAlternativeIndex = 0;
1665     }
1666
1667     std::unique_ptr<BytecodePattern> compile(BumpPointerAllocator* allocator, ConcurrentJSLock* lock)
1668     {
1669         regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough());
1670         emitDisjunction(m_pattern.m_body);
1671         regexEnd();
1672
1673 #ifndef NDEBUG
1674         if (Options::dumpCompiledRegExpPatterns())
1675             dumpDisjunction(m_bodyDisjunction.get());
1676 #endif
1677
1678         return std::make_unique<BytecodePattern>(WTFMove(m_bodyDisjunction), m_allParenthesesInfo, m_pattern, allocator, lock);
1679     }
1680
1681     void checkInput(unsigned count)
1682     {
1683         m_bodyDisjunction->terms.append(ByteTerm::CheckInput(count));
1684     }
1685
1686     void uncheckInput(unsigned count)
1687     {
1688         m_bodyDisjunction->terms.append(ByteTerm::UncheckInput(count));
1689     }
1690     
1691     void assertionBOL(unsigned inputPosition)
1692     {
1693         m_bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition));
1694     }
1695
1696     void assertionEOL(unsigned inputPosition)
1697     {
1698         m_bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition));
1699     }
1700
1701     void assertionWordBoundary(bool invert, unsigned inputPosition)
1702     {
1703         m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition));
1704     }
1705
1706     void atomPatternCharacter(UChar32 ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1707     {
1708         if (m_pattern.ignoreCase()) {
1709             UChar32 lo = u_tolower(ch);
1710             UChar32 hi = u_toupper(ch);
1711
1712             if (lo != hi) {
1713                 m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityMaxCount, quantityType));
1714                 return;
1715             }
1716         }
1717
1718         m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityMaxCount, quantityType));
1719     }
1720
1721     void atomCharacterClass(CharacterClass* characterClass, bool invert, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1722     {
1723         m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));
1724
1725         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1726         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
1727         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1728     }
1729
1730     void atomBackReference(unsigned subpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1731     {
1732         ASSERT(subpatternId);
1733
1734         m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
1735
1736         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1737         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
1738         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1739     }
1740
1741     void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1742     {
1743         int beginTerm = m_bodyDisjunction->terms.size();
1744
1745         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1746         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1747         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1748         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1749
1750         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1751         m_currentAlternativeIndex = beginTerm + 1;
1752     }
1753
1754     void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1755     {
1756         int beginTerm = m_bodyDisjunction->terms.size();
1757
1758         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition));
1759         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1760         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1761         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1762
1763         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1764         m_currentAlternativeIndex = beginTerm + 1;
1765     }
1766
1767     void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1768     {
1769         // Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin,
1770         // then fix this up at the end! - simplifying this should make it much clearer.
1771         // https://bugs.webkit.org/show_bug.cgi?id=50136
1772
1773         int beginTerm = m_bodyDisjunction->terms.size();
1774
1775         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1776         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1777         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1778         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1779
1780         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1781         m_currentAlternativeIndex = beginTerm + 1;
1782     }
1783
1784     void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation)
1785     {
1786         int beginTerm = m_bodyDisjunction->terms.size();
1787
1788         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, 0));
1789         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1790         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1791         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1792
1793         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1794         m_currentAlternativeIndex = beginTerm + 1;
1795     }
1796
1797     void atomParentheticalAssertionEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1798     {
1799         unsigned beginTerm = popParenthesesStack();
1800         closeAlternative(beginTerm + 1);
1801         unsigned endTerm = m_bodyDisjunction->terms.size();
1802
1803         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin);
1804
1805         bool invert = m_bodyDisjunction->terms[beginTerm].invert();
1806         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1807
1808         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd, subpatternId, false, invert, inputPosition));
1809         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1810         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1811         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1812
1813         m_bodyDisjunction->terms[beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1814         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1815         m_bodyDisjunction->terms[endTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1816         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1817     }
1818
1819     void assertionDotStarEnclosure(bool bolAnchored, bool eolAnchored)
1820     {
1821         m_bodyDisjunction->terms.append(ByteTerm::DotStarEnclosure(bolAnchored, eolAnchored));
1822     }
1823
1824     unsigned popParenthesesStack()
1825     {
1826         ASSERT(m_parenthesesStack.size());
1827         int stackEnd = m_parenthesesStack.size() - 1;
1828         unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm;
1829         m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex;
1830         m_parenthesesStack.shrink(stackEnd);
1831
1832         ASSERT(beginTerm < m_bodyDisjunction->terms.size());
1833         ASSERT(m_currentAlternativeIndex < m_bodyDisjunction->terms.size());
1834
1835         return beginTerm;
1836     }
1837
1838     void closeAlternative(int beginTerm)
1839     {
1840         int origBeginTerm = beginTerm;
1841         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin);
1842         int endIndex = m_bodyDisjunction->terms.size();
1843
1844         unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
1845
1846         if (!m_bodyDisjunction->terms[beginTerm].alternative.next)
1847             m_bodyDisjunction->terms.remove(beginTerm);
1848         else {
1849             while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
1850                 beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
1851                 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction);
1852                 m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
1853                 m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1854             }
1855
1856             m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
1857
1858             m_bodyDisjunction->terms.append(ByteTerm::AlternativeEnd());
1859             m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
1860         }
1861     }
1862
1863     void closeBodyAlternative()
1864     {
1865         int beginTerm = 0;
1866         int origBeginTerm = 0;
1867         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);
1868         int endIndex = m_bodyDisjunction->terms.size();
1869
1870         unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
1871
1872         while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
1873             beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
1874             ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction);
1875             m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
1876             m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1877         }
1878
1879         m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
1880
1881         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd());
1882         m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
1883     }
1884
1885     void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMinCount, Checked<unsigned> quantityMaxCount, QuantifierType quantityType, unsigned callFrameSize = 0)
1886     {
1887         unsigned beginTerm = popParenthesesStack();
1888         closeAlternative(beginTerm + 1);
1889         unsigned endTerm = m_bodyDisjunction->terms.size();
1890
1891         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1892
1893         ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm];
1894
1895         bool capture = parenthesesBegin.capture();
1896         unsigned subpatternId = parenthesesBegin.atom.subpatternId;
1897
1898         unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
1899         auto parenthesesDisjunction = std::make_unique<ByteDisjunction>(numSubpatterns, callFrameSize);
1900
1901         unsigned firstTermInParentheses = beginTerm + 1;
1902         parenthesesDisjunction->terms.reserveInitialCapacity(endTerm - firstTermInParentheses + 2);
1903
1904         parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin());
1905         for (unsigned termInParentheses = firstTermInParentheses; termInParentheses < endTerm; ++termInParentheses)
1906             parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]);
1907         parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd());
1908
1909         m_bodyDisjunction->terms.shrink(beginTerm);
1910
1911         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction.get(), capture, inputPosition));
1912         m_allParenthesesInfo.append(WTFMove(parenthesesDisjunction));
1913
1914         m_bodyDisjunction->terms[beginTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
1915         m_bodyDisjunction->terms[beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1916         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1917         m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1918     }
1919
1920     void atomParenthesesOnceEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMinCount, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1921     {
1922         unsigned beginTerm = popParenthesesStack();
1923         closeAlternative(beginTerm + 1);
1924         unsigned endTerm = m_bodyDisjunction->terms.size();
1925
1926         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1927
1928         bool capture = m_bodyDisjunction->terms[beginTerm].capture();
1929         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1930
1931         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, capture, false, inputPosition));
1932         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1933         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1934         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1935
1936         m_bodyDisjunction->terms[beginTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
1937         m_bodyDisjunction->terms[beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1938         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1939         m_bodyDisjunction->terms[endTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
1940         m_bodyDisjunction->terms[endTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1941         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1942     }
1943
1944     void atomParenthesesTerminalEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMinCount, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1945     {
1946         unsigned beginTerm = popParenthesesStack();
1947         closeAlternative(beginTerm + 1);
1948         unsigned endTerm = m_bodyDisjunction->terms.size();
1949
1950         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
1951
1952         bool capture = m_bodyDisjunction->terms[beginTerm].capture();
1953         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1954
1955         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, capture, false, inputPosition));
1956         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1957         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1958         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1959
1960         m_bodyDisjunction->terms[beginTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
1961         m_bodyDisjunction->terms[beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1962         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1963         m_bodyDisjunction->terms[endTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
1964         m_bodyDisjunction->terms[endTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1965         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1966     }
1967
1968     void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough)
1969     {
1970         m_bodyDisjunction = std::make_unique<ByteDisjunction>(numSubpatterns, callFrameSize);
1971         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough));
1972         m_bodyDisjunction->terms[0].frameLocation = 0;
1973         m_currentAlternativeIndex = 0;
1974     }
1975
1976     void regexEnd()
1977     {
1978         closeBodyAlternative();
1979     }
1980
1981     void alternativeBodyDisjunction(bool onceThrough)
1982     {
1983         int newAlternativeIndex = m_bodyDisjunction->terms.size();
1984         m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
1985         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction(onceThrough));
1986
1987         m_currentAlternativeIndex = newAlternativeIndex;
1988     }
1989
1990     void alternativeDisjunction()
1991     {
1992         int newAlternativeIndex = m_bodyDisjunction->terms.size();
1993         m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
1994         m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction());
1995
1996         m_currentAlternativeIndex = newAlternativeIndex;
1997     }
1998
1999     void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0)
2000     {
2001         for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
2002             unsigned currentCountAlreadyChecked = inputCountAlreadyChecked;
2003
2004             PatternAlternative* alternative = disjunction->m_alternatives[alt].get();
2005
2006             if (alt) {
2007                 if (disjunction == m_pattern.m_body)
2008                     alternativeBodyDisjunction(alternative->onceThrough());
2009                 else
2010                     alternativeDisjunction();
2011             }
2012
2013             unsigned minimumSize = alternative->m_minimumSize;
2014             ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked);
2015             unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked;
2016
2017             if (countToCheck) {
2018                 checkInput(countToCheck);
2019                 currentCountAlreadyChecked += countToCheck;
2020             }
2021
2022             for (auto& term : alternative->m_terms) {
2023                 switch (term.type) {
2024                 case PatternTerm::TypeAssertionBOL:
2025                     assertionBOL(currentCountAlreadyChecked - term.inputPosition);
2026                     break;
2027
2028                 case PatternTerm::TypeAssertionEOL:
2029                     assertionEOL(currentCountAlreadyChecked - term.inputPosition);
2030                     break;
2031
2032                 case PatternTerm::TypeAssertionWordBoundary:
2033                     assertionWordBoundary(term.invert(), currentCountAlreadyChecked - term.inputPosition);
2034                     break;
2035
2036                 case PatternTerm::TypePatternCharacter:
2037                     atomPatternCharacter(term.patternCharacter, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityMaxCount, term.quantityType);
2038                     break;
2039
2040                 case PatternTerm::TypeCharacterClass:
2041                     atomCharacterClass(term.characterClass, term.invert(), currentCountAlreadyChecked- term.inputPosition, term.frameLocation, term.quantityMaxCount, term.quantityType);
2042                     break;
2043
2044                 case PatternTerm::TypeBackReference:
2045                     atomBackReference(term.backReferenceSubpatternId, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityMaxCount, term.quantityType);
2046                         break;
2047
2048                 case PatternTerm::TypeForwardReference:
2049                     break;
2050
2051                 case PatternTerm::TypeParenthesesSubpattern: {
2052                     unsigned disjunctionAlreadyCheckedCount = 0;
2053                     if (term.quantityMaxCount == 1 && !term.parentheses.isCopy) {
2054                         unsigned alternativeFrameLocation = term.frameLocation;
2055                         // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame.
2056                         if (term.quantityType == QuantifierFixedCount)
2057                             disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;
2058                         else
2059                             alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
2060                         ASSERT(currentCountAlreadyChecked >= term.inputPosition);
2061                         unsigned delegateEndInputOffset = currentCountAlreadyChecked - term.inputPosition;
2062                         atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount + delegateEndInputOffset, term.frameLocation, alternativeFrameLocation);
2063                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
2064                         atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityMinCount, term.quantityMaxCount, term.quantityType);
2065                     } else if (term.parentheses.isTerminal) {
2066                         ASSERT(currentCountAlreadyChecked >= term.inputPosition);
2067                         unsigned delegateEndInputOffset = currentCountAlreadyChecked - term.inputPosition;
2068                         atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount + delegateEndInputOffset, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesOnce);
2069                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
2070                         atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityMinCount, term.quantityMaxCount, term.quantityType);
2071                     } else {
2072                         ASSERT(currentCountAlreadyChecked >= term.inputPosition);
2073                         unsigned delegateEndInputOffset = currentCountAlreadyChecked - term.inputPosition;
2074                         atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount + delegateEndInputOffset, term.frameLocation, 0);
2075                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
2076                         atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityMinCount, term.quantityMaxCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
2077                     }
2078                     break;
2079                 }
2080
2081                 case PatternTerm::TypeParentheticalAssertion: {
2082                     unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion;
2083
2084                     ASSERT(currentCountAlreadyChecked >= term.inputPosition);
2085                     unsigned positiveInputOffset = currentCountAlreadyChecked - term.inputPosition;
2086                     unsigned uncheckAmount = 0;
2087                     if (positiveInputOffset > term.parentheses.disjunction->m_minimumSize) {
2088                         uncheckAmount = positiveInputOffset - term.parentheses.disjunction->m_minimumSize;
2089                         uncheckInput(uncheckAmount);
2090                         currentCountAlreadyChecked -= uncheckAmount;
2091                     }
2092
2093                     atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation);
2094                     emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset - uncheckAmount);
2095                     atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityMaxCount, term.quantityType);
2096                     if (uncheckAmount) {
2097                         checkInput(uncheckAmount);
2098                         currentCountAlreadyChecked += uncheckAmount;
2099                     }
2100                     break;
2101                 }
2102
2103                 case PatternTerm::TypeDotStarEnclosure:
2104                     assertionDotStarEnclosure(term.anchors.bolAnchor, term.anchors.eolAnchor);
2105                     break;
2106                 }
2107             }
2108         }
2109     }
2110 #ifndef NDEBUG
2111     void dumpDisjunction(ByteDisjunction* disjunction, unsigned nesting = 0)
2112     {
2113         PrintStream& out = WTF::dataFile();
2114
2115         unsigned termIndexNest = 0;
2116
2117         if (!nesting) {
2118             out.printf("ByteDisjunction(%p):\n", disjunction);
2119             nesting = 1;
2120         } else {
2121             termIndexNest = nesting - 1;
2122             nesting = 2;
2123         }
2124
2125         auto outputTermIndexAndNest = [&](size_t index, unsigned termNesting) {
2126             for (unsigned nestingDepth = 0; nestingDepth < termIndexNest; nestingDepth++)
2127                 out.print("  ");
2128             out.printf("%4lu", index);
2129             for (unsigned nestingDepth = 0; nestingDepth < termNesting; nestingDepth++)
2130                 out.print("  ");
2131         };
2132
2133         auto dumpQuantity = [&](ByteTerm& term) {
2134             if (term.atom.quantityType == QuantifierFixedCount && term.atom.quantityMinCount == 1 && term.atom.quantityMaxCount == 1)
2135                 return;
2136
2137             out.print(" {", term.atom.quantityMinCount);
2138             if (term.atom.quantityMinCount != term.atom.quantityMaxCount) {
2139                 if (term.atom.quantityMaxCount == UINT_MAX)
2140                     out.print(",inf");
2141                 else
2142                     out.print(",", term.atom.quantityMaxCount);
2143             }
2144             out.print("}");
2145             if (term.atom.quantityType == QuantifierGreedy)
2146                 out.print(" greedy");
2147             else if (term.atom.quantityType == QuantifierNonGreedy)
2148                 out.print(" non-greedy");
2149         };
2150
2151         auto dumpCaptured = [&](ByteTerm& term) {
2152             if (term.capture())
2153                 out.print(" captured (#", term.atom.subpatternId, ")");
2154         };
2155
2156         auto dumpInverted = [&](ByteTerm& term) {
2157             if (term.invert())
2158                 out.print(" inverted");
2159         };
2160
2161         auto dumpInputPosition = [&](ByteTerm& term) {
2162             out.printf(" inputPosition %u", term.inputPosition);
2163         };
2164
2165         auto dumpCharacter = [&](ByteTerm& term) {
2166             out.print(" ");
2167             dumpUChar32(out, term.atom.patternCharacter);
2168         };
2169
2170         auto dumpCharClass = [&](ByteTerm& term) {
2171             out.print(" ");
2172             dumpCharacterClass(out, &m_pattern, term.atom.characterClass);
2173         };
2174
2175         for (size_t idx = 0; idx < disjunction->terms.size(); ++idx) {
2176             ByteTerm term = disjunction->terms[idx];
2177
2178             bool outputNewline = true;
2179
2180             switch (term.type) {
2181             case ByteTerm::TypeBodyAlternativeBegin:
2182                 outputTermIndexAndNest(idx, nesting++);
2183                 out.print("BodyAlternativeBegin");
2184                 if (term.alternative.onceThrough)
2185                     out.print(" onceThrough");
2186                 break;
2187             case ByteTerm::TypeBodyAlternativeDisjunction:
2188                 outputTermIndexAndNest(idx, nesting - 1);
2189                 out.print("BodyAlternativeDisjunction");
2190                 break;
2191             case ByteTerm::TypeBodyAlternativeEnd:
2192                 outputTermIndexAndNest(idx, --nesting);
2193                 out.print("BodyAlternativeEnd");
2194                 break;
2195             case ByteTerm::TypeAlternativeBegin:
2196                 outputTermIndexAndNest(idx, nesting++);
2197                 out.print("AlternativeBegin");
2198                 break;
2199             case ByteTerm::TypeAlternativeDisjunction:
2200                 outputTermIndexAndNest(idx, nesting - 1);
2201                 out.print("AlternativeDisjunction");
2202                 break;
2203             case ByteTerm::TypeAlternativeEnd:
2204                 outputTermIndexAndNest(idx, --nesting);
2205                 out.print("AlternativeEnd");
2206                 break;
2207             case ByteTerm::TypeSubpatternBegin:
2208                 outputTermIndexAndNest(idx, nesting++);
2209                 out.print("SubpatternBegin");
2210                 break;
2211             case ByteTerm::TypeSubpatternEnd:
2212                 outputTermIndexAndNest(idx, --nesting);
2213                 out.print("SubpatternEnd");
2214                 break;
2215             case ByteTerm::TypeAssertionBOL:
2216                 outputTermIndexAndNest(idx, nesting);
2217                 out.print("AssertionBOL");
2218                 break;
2219             case ByteTerm::TypeAssertionEOL:
2220                 outputTermIndexAndNest(idx, nesting);
2221                 out.print("AssertionEOL");
2222                 break;
2223             case ByteTerm::TypeAssertionWordBoundary:
2224                 outputTermIndexAndNest(idx, nesting);
2225                 out.print("AssertionWordBoundary");
2226                 break;
2227             case ByteTerm::TypePatternCharacterOnce:
2228                 outputTermIndexAndNest(idx, nesting);
2229                 out.print("PatternCharacterOnce");
2230                 dumpInverted(term);
2231                 dumpInputPosition(term);
2232                 dumpCharacter(term);
2233                 dumpQuantity(term);
2234                 break;
2235             case ByteTerm::TypePatternCharacterFixed:
2236                 outputTermIndexAndNest(idx, nesting);
2237                 out.print("PatternCharacterFixed");
2238                 dumpInverted(term);
2239                 dumpInputPosition(term);
2240                 dumpCharacter(term);
2241                 out.print(" {", term.atom.quantityMinCount, "}");
2242                 break;
2243             case ByteTerm::TypePatternCharacterGreedy:
2244                 outputTermIndexAndNest(idx, nesting);
2245                 out.print("PatternCharacterGreedy");
2246                 dumpInverted(term);
2247                 dumpInputPosition(term);
2248                 dumpCharacter(term);
2249                 dumpQuantity(term);
2250                 break;
2251             case ByteTerm::TypePatternCharacterNonGreedy:
2252                 outputTermIndexAndNest(idx, nesting);
2253                 out.print("PatternCharacterNonGreedy");
2254                 dumpInverted(term);
2255                 dumpInputPosition(term);
2256                 dumpCharacter(term);
2257                 dumpQuantity(term);
2258                 break;
2259             case ByteTerm::TypePatternCasedCharacterOnce:
2260                 outputTermIndexAndNest(idx, nesting);
2261                 out.print("PatternCasedCharacterOnce");
2262                 break;
2263             case ByteTerm::TypePatternCasedCharacterFixed:
2264                 outputTermIndexAndNest(idx, nesting);
2265                 out.print("PatternCasedCharacterFixed");
2266                 break;
2267             case ByteTerm::TypePatternCasedCharacterGreedy:
2268                 outputTermIndexAndNest(idx, nesting);
2269                 out.print("PatternCasedCharacterGreedy");
2270                 break;
2271             case ByteTerm::TypePatternCasedCharacterNonGreedy:
2272                 outputTermIndexAndNest(idx, nesting);
2273                 out.print("PatternCasedCharacterNonGreedy");
2274                 break;
2275             case ByteTerm::TypeCharacterClass:
2276                 outputTermIndexAndNest(idx, nesting);
2277                 out.print("CharacterClass");
2278                 dumpInverted(term);
2279                 dumpInputPosition(term);
2280                 dumpCharClass(term);
2281                 dumpQuantity(term);
2282                 break;
2283             case ByteTerm::TypeBackReference:
2284                 outputTermIndexAndNest(idx, nesting);
2285                 out.print("BackReference #", term.atom.subpatternId);
2286                 dumpQuantity(term);
2287                 break;
2288             case ByteTerm::TypeParenthesesSubpattern:
2289                 outputTermIndexAndNest(idx, nesting);
2290                 out.print("ParenthesesSubpattern");
2291                 dumpCaptured(term);
2292                 dumpInverted(term);
2293                 dumpInputPosition(term);
2294                 dumpQuantity(term);
2295                 out.print("\n");
2296                 outputNewline = false;
2297                 dumpDisjunction(term.atom.parenthesesDisjunction, nesting);
2298                 break;
2299             case ByteTerm::TypeParenthesesSubpatternOnceBegin:
2300                 outputTermIndexAndNest(idx, nesting++);
2301                 out.print("ParenthesesSubpatternOnceBegin");
2302                 dumpCaptured(term);
2303                 dumpInverted(term);
2304                 dumpInputPosition(term);
2305                 break;
2306             case ByteTerm::TypeParenthesesSubpatternOnceEnd:
2307                 outputTermIndexAndNest(idx, --nesting);
2308                 out.print("ParenthesesSubpatternOnceEnd");
2309                 break;
2310             case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
2311                 outputTermIndexAndNest(idx, nesting++);
2312                 out.print("ParenthesesSubpatternTerminalBegin");
2313                 dumpInverted(term);
2314                 dumpInputPosition(term);
2315                 break;
2316             case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
2317                 outputTermIndexAndNest(idx, --nesting);
2318                 out.print("ParenthesesSubpatternTerminalEnd");
2319                 break;
2320             case ByteTerm::TypeParentheticalAssertionBegin:
2321                 outputTermIndexAndNest(idx, nesting++);
2322                 out.print("ParentheticalAssertionBegin");
2323                 dumpInverted(term);
2324                 dumpInputPosition(term);
2325                 break;
2326             case ByteTerm::TypeParentheticalAssertionEnd:
2327                 outputTermIndexAndNest(idx, --nesting);
2328                 out.print("ParentheticalAssertionEnd");
2329                 break;
2330             case ByteTerm::TypeCheckInput:
2331                 outputTermIndexAndNest(idx, nesting);
2332                 out.print("CheckInput ", term.checkInputCount);
2333                 break;
2334             case ByteTerm::TypeUncheckInput:
2335                 outputTermIndexAndNest(idx, nesting);
2336                 out.print("UncheckInput ", term.checkInputCount);
2337                 break;
2338             case ByteTerm::TypeDotStarEnclosure:
2339                 outputTermIndexAndNest(idx, nesting);
2340                 out.print("DotStarEnclosure");
2341                 break;
2342             }
2343             if (outputNewline)
2344                 out.print("\n");
2345         }
2346     }
2347 #endif
2348
2349 private:
2350     YarrPattern& m_pattern;
2351     std::unique_ptr<ByteDisjunction> m_bodyDisjunction;
2352     unsigned m_currentAlternativeIndex;
2353     Vector<ParenthesesStackEntry> m_parenthesesStack;
2354     Vector<std::unique_ptr<ByteDisjunction>> m_allParenthesesInfo;
2355 };
2356
2357 std::unique_ptr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator, ConcurrentJSLock* lock)
2358 {
2359     return ByteCompiler(pattern).compile(allocator, lock);
2360 }
2361
2362 unsigned interpret(BytecodePattern* bytecode, const String& input, unsigned start, unsigned* output)
2363 {
2364     SuperSamplerScope superSamplerScope(false);
2365     if (input.is8Bit())
2366         return Interpreter<LChar>(bytecode, output, input.characters8(), input.length(), start).interpret();
2367     return Interpreter<UChar>(bytecode, output, input.characters16(), input.length(), start).interpret();
2368 }
2369
2370 unsigned interpret(BytecodePattern* bytecode, const LChar* input, unsigned length, unsigned start, unsigned* output)
2371 {
2372     SuperSamplerScope superSamplerScope(false);
2373     return Interpreter<LChar>(bytecode, output, input, length, start).interpret();
2374 }
2375
2376 unsigned interpret(BytecodePattern* bytecode, const UChar* input, unsigned length, unsigned start, unsigned* output)
2377 {
2378     SuperSamplerScope superSamplerScope(false);
2379     return Interpreter<UChar>(bytecode, output, input, length, start).interpret();
2380 }
2381
2382 // These should be the same for both UChar & LChar.
2383 COMPILE_ASSERT(sizeof(BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter);
2384 COMPILE_ASSERT(sizeof(BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass);
2385 COMPILE_ASSERT(sizeof(BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference);
2386 COMPILE_ASSERT(sizeof(BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative);
2387 COMPILE_ASSERT(sizeof(BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion);
2388 COMPILE_ASSERT(sizeof(BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce);
2389 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheses) <= (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses);
2390
2391
2392 } }