aeca421cc2c1f7445cb6d6b36434e02bf25ffb1b
[WebKit-https.git] / Source / JavaScriptCore / yarr / YarrInterpreter.cpp
1 /*
2  * Copyright (C) 2009, 2013, 2016 Apple Inc. All rights reserved.
3  * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26
27 #include "config.h"
28 #include "YarrInterpreter.h"
29
30 #include "SuperSampler.h"
31 #include "Yarr.h"
32 #include "YarrCanonicalize.h"
33 #include <wtf/BumpPointerAllocator.h>
34 #include <wtf/DataLog.h>
35 #include <wtf/text/CString.h>
36 #include <wtf/text/WTFString.h>
37
38 using namespace WTF;
39
40 namespace JSC { namespace Yarr {
41
42 template<typename CharType>
43 class Interpreter {
44 public:
45     struct ParenthesesDisjunctionContext;
46
47     struct BackTrackInfoPatternCharacter {
48         uintptr_t begin; // Only needed for unicode patterns
49         uintptr_t matchAmount;
50     };
51     struct BackTrackInfoCharacterClass {
52         uintptr_t begin; // Only needed for unicode patterns
53         uintptr_t matchAmount;
54     };
55     struct BackTrackInfoBackReference {
56         uintptr_t begin; // Not really needed for greedy quantifiers.
57         uintptr_t matchAmount; // Not really needed for fixed quantifiers.
58     };
59     struct BackTrackInfoAlternative {
60         uintptr_t offset;
61     };
62     struct BackTrackInfoParentheticalAssertion {
63         uintptr_t begin;
64     };
65     struct BackTrackInfoParenthesesOnce {
66         uintptr_t begin;
67     };
68     struct BackTrackInfoParenthesesTerminal {
69         uintptr_t begin;
70     };
71     struct BackTrackInfoParentheses {
72         uintptr_t matchAmount;
73         ParenthesesDisjunctionContext* lastContext;
74     };
75
76     static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context)
77     {
78         context->next = backTrack->lastContext;
79         backTrack->lastContext = context;
80         ++backTrack->matchAmount;
81     }
82
83     static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack)
84     {
85         RELEASE_ASSERT(backTrack->matchAmount);
86         RELEASE_ASSERT(backTrack->lastContext);
87         backTrack->lastContext = backTrack->lastContext->next;
88         --backTrack->matchAmount;
89     }
90
91     struct DisjunctionContext
92     {
93         DisjunctionContext()
94             : term(0)
95         {
96         }
97
98         void* operator new(size_t, void* where)
99         {
100             return where;
101         }
102
103         int term;
104         unsigned matchBegin;
105         unsigned matchEnd;
106         uintptr_t frame[1];
107     };
108
109     DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction)
110     {
111         size_t size = sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
112         allocatorPool = allocatorPool->ensureCapacity(size);
113         RELEASE_ASSERT(allocatorPool);
114         return new (allocatorPool->alloc(size)) DisjunctionContext();
115     }
116
117     void freeDisjunctionContext(DisjunctionContext* context)
118     {
119         allocatorPool = allocatorPool->dealloc(context);
120     }
121
122     struct ParenthesesDisjunctionContext
123     {
124         ParenthesesDisjunctionContext(unsigned* output, ByteTerm& term)
125             : next(0)
126         {
127             unsigned firstSubpatternId = term.atom.subpatternId;
128             unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns;
129
130             for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) {
131                 subpatternBackup[i] = output[(firstSubpatternId << 1) + i];
132                 output[(firstSubpatternId << 1) + i] = offsetNoMatch;
133             }
134
135             new (getDisjunctionContext(term)) DisjunctionContext();
136         }
137
138         void* operator new(size_t, void* where)
139         {
140             return where;
141         }
142
143         void restoreOutput(unsigned* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns)
144         {
145             for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i)
146                 output[(firstSubpatternId << 1) + i] = subpatternBackup[i];
147         }
148
149         DisjunctionContext* getDisjunctionContext(ByteTerm& term)
150         {
151             return reinterpret_cast<DisjunctionContext*>(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1]));
152         }
153
154         ParenthesesDisjunctionContext* next;
155         unsigned subpatternBackup[1];
156     };
157
158     ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, unsigned* output, ByteTerm& term)
159     {
160         size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(unsigned) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + static_cast<size_t>(disjunction->m_frameSize) * sizeof(uintptr_t);
161         allocatorPool = allocatorPool->ensureCapacity(size);
162         RELEASE_ASSERT(allocatorPool);
163         return new (allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term);
164     }
165
166     void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context)
167     {
168         allocatorPool = allocatorPool->dealloc(context);
169     }
170
171     class InputStream {
172     public:
173         InputStream(const CharType* input, unsigned start, unsigned length, bool decodeSurrogatePairs)
174             : input(input)
175             , pos(start)
176             , length(length)
177             , decodeSurrogatePairs(decodeSurrogatePairs)
178         {
179         }
180
181         void next()
182         {
183             ++pos;
184         }
185
186         void rewind(unsigned amount)
187         {
188             ASSERT(pos >= amount);
189             pos -= amount;
190         }
191
192         int read()
193         {
194             ASSERT(pos < length);
195             if (pos < length)
196                 return input[pos];
197             return -1;
198         }
199
200         int readPair()
201         {
202             ASSERT(pos + 1 < length);
203             return input[pos] | input[pos + 1] << 16;
204         }
205
206         int readChecked(unsigned negativePositionOffest)
207         {
208             RELEASE_ASSERT(pos >= negativePositionOffest);
209             unsigned p = pos - negativePositionOffest;
210             ASSERT(p < length);
211             int result = input[p];
212             if (U16_IS_LEAD(result) && decodeSurrogatePairs && p + 1 < length && U16_IS_TRAIL(input[p + 1])) {
213                 if (atEnd())
214                     return -1;
215                 
216                 result = U16_GET_SUPPLEMENTARY(result, input[p + 1]);
217                 next();
218             }
219             return result;
220         }
221         
222         int readSurrogatePairChecked(unsigned negativePositionOffset)
223         {
224             RELEASE_ASSERT(pos >= negativePositionOffset);
225             unsigned p = pos - negativePositionOffset;
226             ASSERT(p < length);
227             if (p + 1 >= length)
228                 return -1;
229
230             int first = input[p];
231             int second = input[p + 1];
232             if (U16_IS_LEAD(first) && U16_IS_TRAIL(second))
233                 return U16_GET_SUPPLEMENTARY(first, second);
234
235             return -1;
236         }
237
238         int reread(unsigned from)
239         {
240             ASSERT(from < length);
241             int result = input[from];
242             if (U16_IS_LEAD(result) && decodeSurrogatePairs && from + 1 < length && U16_IS_TRAIL(input[from + 1]))
243                 result = U16_GET_SUPPLEMENTARY(result, input[from + 1]);
244             return result;
245         }
246
247         int prev()
248         {
249             ASSERT(!(pos > length));
250             if (pos && length)
251                 return input[pos - 1];
252             return -1;
253         }
254
255         unsigned getPos()
256         {
257             return pos;
258         }
259
260         void setPos(unsigned p)
261         {
262             pos = p;
263         }
264
265         bool atStart()
266         {
267             return pos == 0;
268         }
269
270         bool atEnd()
271         {
272             return pos == length;
273         }
274
275         unsigned end()
276         {
277             return length;
278         }
279
280         bool checkInput(unsigned count)
281         {
282             if (((pos + count) <= length) && ((pos + count) >= pos)) {
283                 pos += count;
284                 return true;
285             }
286             return false;
287         }
288
289         void uncheckInput(unsigned count)
290         {
291             RELEASE_ASSERT(pos >= count);
292             pos -= count;
293         }
294
295         bool atStart(unsigned negativePositionOffset)
296         {
297             return pos == negativePositionOffset;
298         }
299
300         bool atEnd(unsigned negativePositionOffest)
301         {
302             RELEASE_ASSERT(pos >= negativePositionOffest);
303             return (pos - negativePositionOffest) == length;
304         }
305
306         bool isAvailableInput(unsigned offset)
307         {
308             return (((pos + offset) <= length) && ((pos + offset) >= pos));
309         }
310
311     private:
312         const CharType* input;
313         unsigned pos;
314         unsigned length;
315         bool decodeSurrogatePairs;
316     };
317
318     bool testCharacterClass(CharacterClass* characterClass, int ch)
319     {
320         if (!isASCII(ch)) {
321             for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i)
322                 if (ch == characterClass->m_matchesUnicode[i])
323                     return true;
324             for (unsigned i = 0; i < characterClass->m_rangesUnicode.size(); ++i)
325                 if ((ch >= characterClass->m_rangesUnicode[i].begin) && (ch <= characterClass->m_rangesUnicode[i].end))
326                     return true;
327         } else {
328             for (unsigned i = 0; i < characterClass->m_matches.size(); ++i)
329                 if (ch == characterClass->m_matches[i])
330                     return true;
331             for (unsigned i = 0; i < characterClass->m_ranges.size(); ++i)
332                 if ((ch >= characterClass->m_ranges[i].begin) && (ch <= characterClass->m_ranges[i].end))
333                     return true;
334         }
335
336         return false;
337     }
338
339     bool checkCharacter(int testChar, unsigned negativeInputOffset)
340     {
341         return testChar == input.readChecked(negativeInputOffset);
342     }
343
344     bool checkSurrogatePair(int testUnicodeChar, unsigned negativeInputOffset)
345     {
346         return testUnicodeChar == input.readSurrogatePairChecked(negativeInputOffset);
347     }
348
349     bool checkCasedCharacter(int loChar, int hiChar, unsigned negativeInputOffset)
350     {
351         int ch = input.readChecked(negativeInputOffset);
352         return (loChar == ch) || (hiChar == ch);
353     }
354
355     bool checkCharacterClass(CharacterClass* characterClass, bool invert, unsigned negativeInputOffset)
356     {
357         bool match = testCharacterClass(characterClass, input.readChecked(negativeInputOffset));
358         return invert ? !match : match;
359     }
360
361     bool tryConsumeBackReference(int matchBegin, int matchEnd, unsigned negativeInputOffset)
362     {
363         unsigned matchSize = (unsigned)(matchEnd - matchBegin);
364
365         if (!input.checkInput(matchSize))
366             return false;
367
368         for (unsigned i = 0; i < matchSize; ++i) {
369             int oldCh = input.reread(matchBegin + i);
370             int ch;
371             if (!U_IS_BMP(oldCh)) {
372                 ch = input.readSurrogatePairChecked(negativeInputOffset + matchSize - i);
373                 ++i;
374             } else
375                 ch = input.readChecked(negativeInputOffset + matchSize - i);
376
377             if (oldCh == ch)
378                 continue;
379
380             if (pattern->ignoreCase()) {
381                 // See ES 6.0, 21.2.2.8.2 for the definition of Canonicalize(). For non-Unicode
382                 // patterns, Unicode values are never allowed to match against ASCII ones.
383                 // For Unicode, we need to check all canonical equivalents of a character.
384                 if (!unicode && (isASCII(oldCh) || isASCII(ch))) {
385                     if (toASCIIUpper(oldCh) == toASCIIUpper(ch))
386                         continue;
387                 } else if (areCanonicallyEquivalent(oldCh, ch, unicode ? CanonicalMode::Unicode : CanonicalMode::UCS2))
388                     continue;
389             }
390
391             input.uncheckInput(matchSize);
392             return false;
393         }
394
395         return true;
396     }
397
398     bool matchAssertionBOL(ByteTerm& term)
399     {
400         return (input.atStart(term.inputPosition)) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + 1)));
401     }
402
403     bool matchAssertionEOL(ByteTerm& term)
404     {
405         if (term.inputPosition)
406             return (input.atEnd(term.inputPosition)) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
407
408         return (input.atEnd()) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.read()));
409     }
410
411     bool matchAssertionWordBoundary(ByteTerm& term)
412     {
413         bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition + 1));
414         bool readIsWordchar;
415         if (term.inputPosition)
416             readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition));
417         else
418             readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read());
419
420         bool wordBoundary = prevIsWordchar != readIsWordchar;
421         return term.invert() ? !wordBoundary : wordBoundary;
422     }
423
424     bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context)
425     {
426         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
427
428         switch (term.atom.quantityType) {
429         case QuantifierFixedCount:
430             break;
431
432         case QuantifierGreedy:
433             if (backTrack->matchAmount) {
434                 --backTrack->matchAmount;
435                 input.uncheckInput(U16_LENGTH(term.atom.patternCharacter));
436                 return true;
437             }
438             break;
439
440         case QuantifierNonGreedy:
441             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
442                 ++backTrack->matchAmount;
443                 if (checkCharacter(term.atom.patternCharacter, term.inputPosition + 1))
444                     return true;
445             }
446             input.setPos(backTrack->begin);
447             break;
448         }
449
450         return false;
451     }
452
453     bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context)
454     {
455         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
456
457         switch (term.atom.quantityType) {
458         case QuantifierFixedCount:
459             break;
460
461         case QuantifierGreedy:
462             if (backTrack->matchAmount) {
463                 --backTrack->matchAmount;
464                 input.uncheckInput(1);
465                 return true;
466             }
467             break;
468
469         case QuantifierNonGreedy:
470             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
471                 ++backTrack->matchAmount;
472                 if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition + 1))
473                     return true;
474             }
475             input.uncheckInput(backTrack->matchAmount);
476             break;
477         }
478
479         return false;
480     }
481
482     bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context)
483     {
484         ASSERT(term.type == ByteTerm::TypeCharacterClass);
485         BackTrackInfoCharacterClass* backTrack = reinterpret_cast<BackTrackInfoCharacterClass*>(context->frame + term.frameLocation);
486
487         switch (term.atom.quantityType) {
488         case QuantifierFixedCount: {
489             if (unicode) {
490                 backTrack->begin = input.getPos();
491                 unsigned matchAmount = 0;
492                 for (matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
493                     if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount)) {
494                         input.setPos(backTrack->begin);
495                         return false;
496                     }
497                 }
498
499                 return true;
500             }
501
502             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
503                 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount))
504                     return false;
505             }
506             return true;
507         }
508
509         case QuantifierGreedy: {
510             unsigned position = input.getPos();
511             backTrack->begin = position;
512             unsigned matchAmount = 0;
513             while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
514                 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) {
515                     input.setPos(position);
516                     break;
517                 }
518                 ++matchAmount;
519                 position = input.getPos();
520             }
521             backTrack->matchAmount = matchAmount;
522
523             return true;
524         }
525
526         case QuantifierNonGreedy:
527             backTrack->begin = input.getPos();
528             backTrack->matchAmount = 0;
529             return true;
530         }
531
532         RELEASE_ASSERT_NOT_REACHED();
533         return false;
534     }
535
536     bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context)
537     {
538         ASSERT(term.type == ByteTerm::TypeCharacterClass);
539         BackTrackInfoCharacterClass* backTrack = reinterpret_cast<BackTrackInfoCharacterClass*>(context->frame + term.frameLocation);
540
541         switch (term.atom.quantityType) {
542         case QuantifierFixedCount:
543             if (unicode)
544                 input.setPos(backTrack->begin);
545             break;
546
547         case QuantifierGreedy:
548             if (backTrack->matchAmount) {
549                 if (unicode) {
550                     // Rematch one less match
551                     input.setPos(backTrack->begin);
552                     --backTrack->matchAmount;
553                     for (unsigned matchAmount = 0; (matchAmount < backTrack->matchAmount) && input.checkInput(1); ++matchAmount) {
554                         if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) {
555                             input.uncheckInput(1);
556                             break;
557                         }
558                     }
559                     return true;
560                 }
561                 --backTrack->matchAmount;
562                 input.uncheckInput(1);
563                 return true;
564             }
565             break;
566
567         case QuantifierNonGreedy:
568             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
569                 ++backTrack->matchAmount;
570                 if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1))
571                     return true;
572             }
573             input.setPos(backTrack->begin);
574             break;
575         }
576
577         return false;
578     }
579
580     bool matchBackReference(ByteTerm& term, DisjunctionContext* context)
581     {
582         ASSERT(term.type == ByteTerm::TypeBackReference);
583         BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
584
585         unsigned matchBegin = output[(term.atom.subpatternId << 1)];
586         unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1];
587
588         // If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that.
589         // In this case the result of match is empty string like when it references to a parentheses with zero-width match.
590         // Eg.: /(a\1)/
591         if (matchEnd == offsetNoMatch)
592             return true;
593
594         if (matchBegin == offsetNoMatch)
595             return true;
596
597         ASSERT(matchBegin <= matchEnd);
598
599         if (matchBegin == matchEnd)
600             return true;
601
602         switch (term.atom.quantityType) {
603         case QuantifierFixedCount: {
604             backTrack->begin = input.getPos();
605             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
606                 if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
607                     input.setPos(backTrack->begin);
608                     return false;
609                 }
610             }
611             return true;
612         }
613
614         case QuantifierGreedy: {
615             unsigned matchAmount = 0;
616             while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition))
617                 ++matchAmount;
618             backTrack->matchAmount = matchAmount;
619             return true;
620         }
621
622         case QuantifierNonGreedy:
623             backTrack->begin = input.getPos();
624             backTrack->matchAmount = 0;
625             return true;
626         }
627
628         RELEASE_ASSERT_NOT_REACHED();
629         return false;
630     }
631
632     bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context)
633     {
634         ASSERT(term.type == ByteTerm::TypeBackReference);
635         BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
636
637         unsigned matchBegin = output[(term.atom.subpatternId << 1)];
638         unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1];
639
640         if (matchBegin == offsetNoMatch)
641             return false;
642
643         ASSERT(matchBegin <= matchEnd);
644
645         if (matchBegin == matchEnd)
646             return false;
647
648         switch (term.atom.quantityType) {
649         case QuantifierFixedCount:
650             // for quantityCount == 1, could rewind.
651             input.setPos(backTrack->begin);
652             break;
653
654         case QuantifierGreedy:
655             if (backTrack->matchAmount) {
656                 --backTrack->matchAmount;
657                 input.rewind(matchEnd - matchBegin);
658                 return true;
659             }
660             break;
661
662         case QuantifierNonGreedy:
663             if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
664                 ++backTrack->matchAmount;
665                 return true;
666             }
667             input.setPos(backTrack->begin);
668             break;
669         }
670
671         return false;
672     }
673
674     void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context)
675     {
676         if (term.capture()) {
677             unsigned subpatternId = term.atom.subpatternId;
678             output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition;
679             output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition;
680         }
681     }
682     void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context)
683     {
684         unsigned firstSubpatternId = term.atom.subpatternId;
685         unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
686         context->restoreOutput(output, firstSubpatternId, count);
687     }
688     JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
689     {
690         while (backTrack->matchAmount) {
691             ParenthesesDisjunctionContext* context = backTrack->lastContext;
692
693             JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true);
694             if (result == JSRegExpMatch)
695                 return JSRegExpMatch;
696
697             resetMatches(term, context);
698             popParenthesesDisjunctionContext(backTrack);
699             freeParenthesesDisjunctionContext(context);
700
701             if (result != JSRegExpNoMatch)
702                 return result;
703         }
704
705         return JSRegExpNoMatch;
706     }
707
708     bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
709     {
710         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
711         ASSERT(term.atom.quantityCount == 1);
712
713         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
714
715         switch (term.atom.quantityType) {
716         case QuantifierGreedy: {
717             // set this speculatively; if we get to the parens end this will be true.
718             backTrack->begin = input.getPos();
719             break;
720         }
721         case QuantifierNonGreedy: {
722             backTrack->begin = notFound;
723             context->term += term.atom.parenthesesWidth;
724             return true;
725         }
726         case QuantifierFixedCount:
727             break;
728         }
729
730         if (term.capture()) {
731             unsigned subpatternId = term.atom.subpatternId;
732             output[(subpatternId << 1)] = input.getPos() - term.inputPosition;
733         }
734
735         return true;
736     }
737
738     bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
739     {
740         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
741         ASSERT(term.atom.quantityCount == 1);
742
743         if (term.capture()) {
744             unsigned subpatternId = term.atom.subpatternId;
745             output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition;
746         }
747
748         if (term.atom.quantityType == QuantifierFixedCount)
749             return true;
750
751         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
752         return backTrack->begin != input.getPos();
753     }
754
755     bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
756     {
757         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
758         ASSERT(term.atom.quantityCount == 1);
759
760         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
761
762         if (term.capture()) {
763             unsigned subpatternId = term.atom.subpatternId;
764             output[(subpatternId << 1)] = offsetNoMatch;
765             output[(subpatternId << 1) + 1] = offsetNoMatch;
766         }
767
768         switch (term.atom.quantityType) {
769         case QuantifierGreedy:
770             // if we backtrack to this point, there is another chance - try matching nothing.
771             ASSERT(backTrack->begin != notFound);
772             backTrack->begin = notFound;
773             context->term += term.atom.parenthesesWidth;
774             return true;
775         case QuantifierNonGreedy:
776             ASSERT(backTrack->begin != notFound);
777             FALLTHROUGH;
778         case QuantifierFixedCount:
779             break;
780         }
781
782         return false;
783     }
784
785     bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
786     {
787         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
788         ASSERT(term.atom.quantityCount == 1);
789
790         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
791
792         switch (term.atom.quantityType) {
793         case QuantifierGreedy:
794             if (backTrack->begin == notFound) {
795                 context->term -= term.atom.parenthesesWidth;
796                 return false;
797             }
798             FALLTHROUGH;
799         case QuantifierNonGreedy:
800             if (backTrack->begin == notFound) {
801                 backTrack->begin = input.getPos();
802                 if (term.capture()) {
803                     // Technically this access to inputPosition should be accessing the begin term's
804                     // inputPosition, but for repeats other than fixed these values should be
805                     // the same anyway! (We don't pre-check for greedy or non-greedy matches.)
806                     ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
807                     ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition);
808                     unsigned subpatternId = term.atom.subpatternId;
809                     output[subpatternId << 1] = input.getPos() + term.inputPosition;
810                 }
811                 context->term -= term.atom.parenthesesWidth;
812                 return true;
813             }
814             FALLTHROUGH;
815         case QuantifierFixedCount:
816             break;
817         }
818
819         return false;
820     }
821
822     bool matchParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
823     {
824         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
825         ASSERT(term.atom.quantityType == QuantifierGreedy);
826         ASSERT(term.atom.quantityCount == quantifyInfinite);
827         ASSERT(!term.capture());
828
829         BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
830         backTrack->begin = input.getPos();
831         return true;
832     }
833
834     bool matchParenthesesTerminalEnd(ByteTerm& term, DisjunctionContext* context)
835     {
836         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalEnd);
837
838         BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
839         // Empty match is a failed match.
840         if (backTrack->begin == input.getPos())
841             return false;
842
843         // Successful match! Okay, what's next? - loop around and try to match more!
844         context->term -= (term.atom.parenthesesWidth + 1);
845         return true;
846     }
847
848     bool backtrackParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
849     {
850         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
851         ASSERT(term.atom.quantityType == QuantifierGreedy);
852         ASSERT(term.atom.quantityCount == quantifyInfinite);
853         ASSERT(!term.capture());
854
855         // If we backtrack to this point, we have failed to match this iteration of the parens.
856         // Since this is greedy / zero minimum a failed is also accepted as a match!
857         context->term += term.atom.parenthesesWidth;
858         return true;
859     }
860
861     bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*)
862     {
863         // 'Terminal' parentheses are at the end of the regex, and as such a match past end
864         // should always be returned as a successful match - we should never backtrack to here.
865         RELEASE_ASSERT_NOT_REACHED();
866         return false;
867     }
868
869     bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
870     {
871         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
872         ASSERT(term.atom.quantityCount == 1);
873
874         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
875
876         backTrack->begin = input.getPos();
877         return true;
878     }
879
880     bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
881     {
882         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
883         ASSERT(term.atom.quantityCount == 1);
884
885         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
886
887         input.setPos(backTrack->begin);
888
889         // We've reached the end of the parens; if they are inverted, this is failure.
890         if (term.invert()) {
891             context->term -= term.atom.parenthesesWidth;
892             return false;
893         }
894
895         return true;
896     }
897
898     bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
899     {
900         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
901         ASSERT(term.atom.quantityCount == 1);
902
903         // We've failed to match parens; if they are inverted, this is win!
904         if (term.invert()) {
905             context->term += term.atom.parenthesesWidth;
906             return true;
907         }
908
909         return false;
910     }
911
912     bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
913     {
914         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
915         ASSERT(term.atom.quantityCount == 1);
916
917         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
918
919         input.setPos(backTrack->begin);
920
921         context->term -= term.atom.parenthesesWidth;
922         return false;
923     }
924
925     JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context)
926     {
927         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
928
929         BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
930         ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
931
932         backTrack->matchAmount = 0;
933         backTrack->lastContext = 0;
934
935         switch (term.atom.quantityType) {
936         case QuantifierFixedCount: {
937             // While we haven't yet reached our fixed limit,
938             while (backTrack->matchAmount < term.atom.quantityCount) {
939                 // Try to do a match, and it it succeeds, add it to the list.
940                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
941                 JSRegExpResult result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
942                 if (result == JSRegExpMatch)
943                     appendParenthesesDisjunctionContext(backTrack, context);
944                 else {
945                     // The match failed; try to find an alternate point to carry on from.
946                     resetMatches(term, context);
947                     freeParenthesesDisjunctionContext(context);
948
949                     if (result != JSRegExpNoMatch)
950                         return result;
951                     JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
952                     if (backtrackResult != JSRegExpMatch)
953                         return backtrackResult;
954                 }
955             }
956
957             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
958             ParenthesesDisjunctionContext* context = backTrack->lastContext;
959             recordParenthesesMatch(term, context);
960             return JSRegExpMatch;
961         }
962
963         case QuantifierGreedy: {
964             while (backTrack->matchAmount < term.atom.quantityCount) {
965                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
966                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
967                 if (result == JSRegExpMatch)
968                     appendParenthesesDisjunctionContext(backTrack, context);
969                 else {
970                     resetMatches(term, context);
971                     freeParenthesesDisjunctionContext(context);
972
973                     if (result != JSRegExpNoMatch)
974                         return result;
975
976                     break;
977                 }
978             }
979
980             if (backTrack->matchAmount) {
981                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
982                 recordParenthesesMatch(term, context);
983             }
984             return JSRegExpMatch;
985         }
986
987         case QuantifierNonGreedy:
988             return JSRegExpMatch;
989         }
990
991         RELEASE_ASSERT_NOT_REACHED();
992         return JSRegExpErrorNoMatch;
993     }
994
995     // Rules for backtracking differ depending on whether this is greedy or non-greedy.
996     //
997     // Greedy matches never should try just adding more - you should already have done
998     // the 'more' cases.  Always backtrack, at least a leetle bit.  However cases where
999     // you backtrack an item off the list needs checking, since we'll never have matched
1000     // the one less case.  Tracking forwards, still add as much as possible.
1001     //
1002     // Non-greedy, we've already done the one less case, so don't match on popping.
1003     // We haven't done the one more case, so always try to add that.
1004     //
1005     JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
1006     {
1007         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
1008
1009         BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
1010         ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
1011
1012         switch (term.atom.quantityType) {
1013         case QuantifierFixedCount: {
1014             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
1015
1016             ParenthesesDisjunctionContext* context = 0;
1017             JSRegExpResult result = parenthesesDoBacktrack(term, backTrack);
1018
1019             if (result != JSRegExpMatch)
1020                 return result;
1021
1022             // While we haven't yet reached our fixed limit,
1023             while (backTrack->matchAmount < term.atom.quantityCount) {
1024                 // Try to do a match, and it it succeeds, add it to the list.
1025                 context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1026                 result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1027
1028                 if (result == JSRegExpMatch)
1029                     appendParenthesesDisjunctionContext(backTrack, context);
1030                 else {
1031                     // The match failed; try to find an alternate point to carry on from.
1032                     resetMatches(term, context);
1033                     freeParenthesesDisjunctionContext(context);
1034
1035                     if (result != JSRegExpNoMatch)
1036                         return result;
1037                     JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
1038                     if (backtrackResult != JSRegExpMatch)
1039                         return backtrackResult;
1040                 }
1041             }
1042
1043             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
1044             context = backTrack->lastContext;
1045             recordParenthesesMatch(term, context);
1046             return JSRegExpMatch;
1047         }
1048
1049         case QuantifierGreedy: {
1050             if (!backTrack->matchAmount)
1051                 return JSRegExpNoMatch;
1052
1053             ParenthesesDisjunctionContext* context = backTrack->lastContext;
1054             JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
1055             if (result == JSRegExpMatch) {
1056                 while (backTrack->matchAmount < term.atom.quantityCount) {
1057                     ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1058                     JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1059                     if (parenthesesResult == JSRegExpMatch)
1060                         appendParenthesesDisjunctionContext(backTrack, context);
1061                     else {
1062                         resetMatches(term, context);
1063                         freeParenthesesDisjunctionContext(context);
1064
1065                         if (parenthesesResult != JSRegExpNoMatch)
1066                             return parenthesesResult;
1067
1068                         break;
1069                     }
1070                 }
1071             } else {
1072                 resetMatches(term, context);
1073                 popParenthesesDisjunctionContext(backTrack);
1074                 freeParenthesesDisjunctionContext(context);
1075
1076                 if (result != JSRegExpNoMatch)
1077                     return result;
1078             }
1079
1080             if (backTrack->matchAmount) {
1081                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
1082                 recordParenthesesMatch(term, context);
1083             }
1084             return JSRegExpMatch;
1085         }
1086
1087         case QuantifierNonGreedy: {
1088             // If we've not reached the limit, try to add one more match.
1089             if (backTrack->matchAmount < term.atom.quantityCount) {
1090                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1091                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1092                 if (result == JSRegExpMatch) {
1093                     appendParenthesesDisjunctionContext(backTrack, context);
1094                     recordParenthesesMatch(term, context);
1095                     return JSRegExpMatch;
1096                 }
1097
1098                 resetMatches(term, context);
1099                 freeParenthesesDisjunctionContext(context);
1100
1101                 if (result != JSRegExpNoMatch)
1102                     return result;
1103             }
1104
1105             // Nope - okay backtrack looking for an alternative.
1106             while (backTrack->matchAmount) {
1107                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
1108                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
1109                 if (result == JSRegExpMatch) {
1110                     // successful backtrack! we're back in the game!
1111                     if (backTrack->matchAmount) {
1112                         context = backTrack->lastContext;
1113                         recordParenthesesMatch(term, context);
1114                     }
1115                     return JSRegExpMatch;
1116                 }
1117
1118                 // pop a match off the stack
1119                 resetMatches(term, context);
1120                 popParenthesesDisjunctionContext(backTrack);
1121                 freeParenthesesDisjunctionContext(context);
1122
1123                 if (result != JSRegExpNoMatch)
1124                     return result;
1125             }
1126
1127             return JSRegExpNoMatch;
1128         }
1129         }
1130
1131         RELEASE_ASSERT_NOT_REACHED();
1132         return JSRegExpErrorNoMatch;
1133     }
1134
1135     bool matchDotStarEnclosure(ByteTerm& term, DisjunctionContext* context)
1136     {
1137         UNUSED_PARAM(term);
1138         unsigned matchBegin = context->matchBegin;
1139
1140         if (matchBegin) {
1141             for (matchBegin--; true; matchBegin--) {
1142                 if (testCharacterClass(pattern->newlineCharacterClass, input.reread(matchBegin))) {
1143                     ++matchBegin;
1144                     break;
1145                 }
1146
1147                 if (!matchBegin)
1148                     break;
1149             }
1150         }
1151
1152         unsigned matchEnd = input.getPos();
1153
1154         for (; (matchEnd != input.end())
1155              && (!testCharacterClass(pattern->newlineCharacterClass, input.reread(matchEnd))); matchEnd++) { }
1156
1157         if (((matchBegin && term.anchors.m_bol)
1158              || ((matchEnd != input.end()) && term.anchors.m_eol))
1159             && !pattern->multiline())
1160             return false;
1161
1162         context->matchBegin = matchBegin;
1163         context->matchEnd = matchEnd;
1164         return true;
1165     }
1166
1167 #define MATCH_NEXT() { ++context->term; goto matchAgain; }
1168 #define BACKTRACK() { --context->term; goto backtrack; }
1169 #define currentTerm() (disjunction->terms[context->term])
1170     JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1171     {
1172         if (!--remainingMatchCount)
1173             return JSRegExpErrorHitLimit;
1174
1175         if (btrack)
1176             BACKTRACK();
1177
1178         context->matchBegin = input.getPos();
1179         context->term = 0;
1180
1181     matchAgain:
1182         ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1183
1184         switch (currentTerm().type) {
1185         case ByteTerm::TypeSubpatternBegin:
1186             MATCH_NEXT();
1187         case ByteTerm::TypeSubpatternEnd:
1188             context->matchEnd = input.getPos();
1189             return JSRegExpMatch;
1190
1191         case ByteTerm::TypeBodyAlternativeBegin:
1192             MATCH_NEXT();
1193         case ByteTerm::TypeBodyAlternativeDisjunction:
1194         case ByteTerm::TypeBodyAlternativeEnd:
1195             context->matchEnd = input.getPos();
1196             return JSRegExpMatch;
1197
1198         case ByteTerm::TypeAlternativeBegin:
1199             MATCH_NEXT();
1200         case ByteTerm::TypeAlternativeDisjunction:
1201         case ByteTerm::TypeAlternativeEnd: {
1202             int offset = currentTerm().alternative.end;
1203             BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1204             backTrack->offset = offset;
1205             context->term += offset;
1206             MATCH_NEXT();
1207         }
1208
1209         case ByteTerm::TypeAssertionBOL:
1210             if (matchAssertionBOL(currentTerm()))
1211                 MATCH_NEXT();
1212             BACKTRACK();
1213         case ByteTerm::TypeAssertionEOL:
1214             if (matchAssertionEOL(currentTerm()))
1215                 MATCH_NEXT();
1216             BACKTRACK();
1217         case ByteTerm::TypeAssertionWordBoundary:
1218             if (matchAssertionWordBoundary(currentTerm()))
1219                 MATCH_NEXT();
1220             BACKTRACK();
1221
1222         case ByteTerm::TypePatternCharacterOnce:
1223         case ByteTerm::TypePatternCharacterFixed: {
1224             if (unicode) {
1225                 if (!U_IS_BMP(currentTerm().atom.patternCharacter)) {
1226                     for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
1227                         if (!checkSurrogatePair(currentTerm().atom.patternCharacter, currentTerm().inputPosition - 2 * matchAmount)) {
1228                             BACKTRACK();
1229                         }
1230                     }
1231                     MATCH_NEXT();
1232                 }
1233             }
1234             unsigned position = input.getPos(); // May need to back out reading a surrogate pair.
1235
1236             for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
1237                 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - matchAmount)) {
1238                     input.setPos(position);
1239                     BACKTRACK();
1240                 }
1241             }
1242             MATCH_NEXT();
1243         }
1244         case ByteTerm::TypePatternCharacterGreedy: {
1245             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1246             unsigned matchAmount = 0;
1247             unsigned position = input.getPos(); // May need to back out reading a surrogate pair.
1248             while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
1249                 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + 1)) {
1250                     input.setPos(position);
1251                     break;
1252                 }
1253                 ++matchAmount;
1254                 position = input.getPos();
1255             }
1256             backTrack->matchAmount = matchAmount;
1257
1258             MATCH_NEXT();
1259         }
1260         case ByteTerm::TypePatternCharacterNonGreedy: {
1261             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1262             backTrack->begin = input.getPos();
1263             backTrack->matchAmount = 0;
1264             MATCH_NEXT();
1265         }
1266
1267         case ByteTerm::TypePatternCasedCharacterOnce:
1268         case ByteTerm::TypePatternCasedCharacterFixed: {
1269             if (unicode) {
1270                 // Case insensitive matching of unicode characters is handled as TypeCharacterClass.
1271                 ASSERT(U_IS_BMP(currentTerm().atom.patternCharacter));
1272
1273                 unsigned position = input.getPos(); // May need to back out reading a surrogate pair.
1274                 
1275                 for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
1276                     if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount)) {
1277                         input.setPos(position);
1278                         BACKTRACK();
1279                     }
1280                 }
1281                 MATCH_NEXT();
1282             }
1283
1284             for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
1285                 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount))
1286                     BACKTRACK();
1287             }
1288             MATCH_NEXT();
1289         }
1290         case ByteTerm::TypePatternCasedCharacterGreedy: {
1291             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1292
1293             // Case insensitive matching of unicode characters is handled as TypeCharacterClass.
1294             ASSERT(!unicode || U_IS_BMP(currentTerm().atom.patternCharacter));
1295
1296             unsigned matchAmount = 0;
1297             while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
1298                 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + 1)) {
1299                     input.uncheckInput(1);
1300                     break;
1301                 }
1302                 ++matchAmount;
1303             }
1304             backTrack->matchAmount = matchAmount;
1305
1306             MATCH_NEXT();
1307         }
1308         case ByteTerm::TypePatternCasedCharacterNonGreedy: {
1309             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1310
1311             // Case insensitive matching of unicode characters is handled as TypeCharacterClass.
1312             ASSERT(!unicode || U_IS_BMP(currentTerm().atom.patternCharacter));
1313             
1314             backTrack->matchAmount = 0;
1315             MATCH_NEXT();
1316         }
1317
1318         case ByteTerm::TypeCharacterClass:
1319             if (matchCharacterClass(currentTerm(), context))
1320                 MATCH_NEXT();
1321             BACKTRACK();
1322         case ByteTerm::TypeBackReference:
1323             if (matchBackReference(currentTerm(), context))
1324                 MATCH_NEXT();
1325             BACKTRACK();
1326         case ByteTerm::TypeParenthesesSubpattern: {
1327             JSRegExpResult result = matchParentheses(currentTerm(), context);
1328
1329             if (result == JSRegExpMatch) {
1330                 MATCH_NEXT();
1331             }  else if (result != JSRegExpNoMatch)
1332                 return result;
1333
1334             BACKTRACK();
1335         }
1336         case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1337             if (matchParenthesesOnceBegin(currentTerm(), context))
1338                 MATCH_NEXT();
1339             BACKTRACK();
1340         case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1341             if (matchParenthesesOnceEnd(currentTerm(), context))
1342                 MATCH_NEXT();
1343             BACKTRACK();
1344         case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1345             if (matchParenthesesTerminalBegin(currentTerm(), context))
1346                 MATCH_NEXT();
1347             BACKTRACK();
1348         case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1349             if (matchParenthesesTerminalEnd(currentTerm(), context))
1350                 MATCH_NEXT();
1351             BACKTRACK();
1352         case ByteTerm::TypeParentheticalAssertionBegin:
1353             if (matchParentheticalAssertionBegin(currentTerm(), context))
1354                 MATCH_NEXT();
1355             BACKTRACK();
1356         case ByteTerm::TypeParentheticalAssertionEnd:
1357             if (matchParentheticalAssertionEnd(currentTerm(), context))
1358                 MATCH_NEXT();
1359             BACKTRACK();
1360
1361         case ByteTerm::TypeCheckInput:
1362             if (input.checkInput(currentTerm().checkInputCount))
1363                 MATCH_NEXT();
1364             BACKTRACK();
1365
1366         case ByteTerm::TypeUncheckInput:
1367             input.uncheckInput(currentTerm().checkInputCount);
1368             MATCH_NEXT();
1369                 
1370         case ByteTerm::TypeDotStarEnclosure:
1371             if (matchDotStarEnclosure(currentTerm(), context))
1372                 return JSRegExpMatch;
1373             BACKTRACK();
1374         }
1375
1376         // We should never fall-through to here.
1377         RELEASE_ASSERT_NOT_REACHED();
1378
1379     backtrack:
1380         ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1381
1382         switch (currentTerm().type) {
1383         case ByteTerm::TypeSubpatternBegin:
1384             return JSRegExpNoMatch;
1385         case ByteTerm::TypeSubpatternEnd:
1386             RELEASE_ASSERT_NOT_REACHED();
1387
1388         case ByteTerm::TypeBodyAlternativeBegin:
1389         case ByteTerm::TypeBodyAlternativeDisjunction: {
1390             int offset = currentTerm().alternative.next;
1391             context->term += offset;
1392             if (offset > 0)
1393                 MATCH_NEXT();
1394
1395             if (input.atEnd() || pattern->sticky())
1396                 return JSRegExpNoMatch;
1397
1398             input.next();
1399
1400             context->matchBegin = input.getPos();
1401
1402             if (currentTerm().alternative.onceThrough)
1403                 context->term += currentTerm().alternative.next;
1404
1405             MATCH_NEXT();
1406         }
1407         case ByteTerm::TypeBodyAlternativeEnd:
1408             RELEASE_ASSERT_NOT_REACHED();
1409
1410         case ByteTerm::TypeAlternativeBegin:
1411         case ByteTerm::TypeAlternativeDisjunction: {
1412             int offset = currentTerm().alternative.next;
1413             context->term += offset;
1414             if (offset > 0)
1415                 MATCH_NEXT();
1416             BACKTRACK();
1417         }
1418         case ByteTerm::TypeAlternativeEnd: {
1419             // We should never backtrack back into an alternative of the main body of the regex.
1420             BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1421             unsigned offset = backTrack->offset;
1422             context->term -= offset;
1423             BACKTRACK();
1424         }
1425
1426         case ByteTerm::TypeAssertionBOL:
1427         case ByteTerm::TypeAssertionEOL:
1428         case ByteTerm::TypeAssertionWordBoundary:
1429             BACKTRACK();
1430
1431         case ByteTerm::TypePatternCharacterOnce:
1432         case ByteTerm::TypePatternCharacterFixed:
1433         case ByteTerm::TypePatternCharacterGreedy:
1434         case ByteTerm::TypePatternCharacterNonGreedy:
1435             if (backtrackPatternCharacter(currentTerm(), context))
1436                 MATCH_NEXT();
1437             BACKTRACK();
1438         case ByteTerm::TypePatternCasedCharacterOnce:
1439         case ByteTerm::TypePatternCasedCharacterFixed:
1440         case ByteTerm::TypePatternCasedCharacterGreedy:
1441         case ByteTerm::TypePatternCasedCharacterNonGreedy:
1442             if (backtrackPatternCasedCharacter(currentTerm(), context))
1443                 MATCH_NEXT();
1444             BACKTRACK();
1445         case ByteTerm::TypeCharacterClass:
1446             if (backtrackCharacterClass(currentTerm(), context))
1447                 MATCH_NEXT();
1448             BACKTRACK();
1449         case ByteTerm::TypeBackReference:
1450             if (backtrackBackReference(currentTerm(), context))
1451                 MATCH_NEXT();
1452             BACKTRACK();
1453         case ByteTerm::TypeParenthesesSubpattern: {
1454             JSRegExpResult result = backtrackParentheses(currentTerm(), context);
1455
1456             if (result == JSRegExpMatch) {
1457                 MATCH_NEXT();
1458             } else if (result != JSRegExpNoMatch)
1459                 return result;
1460
1461             BACKTRACK();
1462         }
1463         case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1464             if (backtrackParenthesesOnceBegin(currentTerm(), context))
1465                 MATCH_NEXT();
1466             BACKTRACK();
1467         case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1468             if (backtrackParenthesesOnceEnd(currentTerm(), context))
1469                 MATCH_NEXT();
1470             BACKTRACK();
1471         case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1472             if (backtrackParenthesesTerminalBegin(currentTerm(), context))
1473                 MATCH_NEXT();
1474             BACKTRACK();
1475         case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1476             if (backtrackParenthesesTerminalEnd(currentTerm(), context))
1477                 MATCH_NEXT();
1478             BACKTRACK();
1479         case ByteTerm::TypeParentheticalAssertionBegin:
1480             if (backtrackParentheticalAssertionBegin(currentTerm(), context))
1481                 MATCH_NEXT();
1482             BACKTRACK();
1483         case ByteTerm::TypeParentheticalAssertionEnd:
1484             if (backtrackParentheticalAssertionEnd(currentTerm(), context))
1485                 MATCH_NEXT();
1486             BACKTRACK();
1487
1488         case ByteTerm::TypeCheckInput:
1489             input.uncheckInput(currentTerm().checkInputCount);
1490             BACKTRACK();
1491
1492         case ByteTerm::TypeUncheckInput:
1493             input.checkInput(currentTerm().checkInputCount);
1494             BACKTRACK();
1495
1496         case ByteTerm::TypeDotStarEnclosure:
1497             RELEASE_ASSERT_NOT_REACHED();
1498         }
1499
1500         RELEASE_ASSERT_NOT_REACHED();
1501         return JSRegExpErrorNoMatch;
1502     }
1503
1504     JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1505     {
1506         JSRegExpResult result = matchDisjunction(disjunction, context, btrack);
1507
1508         if (result == JSRegExpMatch) {
1509             while (context->matchBegin == context->matchEnd) {
1510                 result = matchDisjunction(disjunction, context, true);
1511                 if (result != JSRegExpMatch)
1512                     return result;
1513             }
1514             return JSRegExpMatch;
1515         }
1516
1517         return result;
1518     }
1519
1520     unsigned interpret()
1521     {
1522         if (!input.isAvailableInput(0))
1523             return offsetNoMatch;
1524
1525         for (unsigned i = 0; i < pattern->m_body->m_numSubpatterns + 1; ++i)
1526             output[i << 1] = offsetNoMatch;
1527
1528         allocatorPool = pattern->m_allocator->startAllocator();
1529         RELEASE_ASSERT(allocatorPool);
1530
1531         DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
1532
1533         JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false);
1534         if (result == JSRegExpMatch) {
1535             output[0] = context->matchBegin;
1536             output[1] = context->matchEnd;
1537         }
1538
1539         freeDisjunctionContext(context);
1540
1541         pattern->m_allocator->stopAllocator();
1542
1543         ASSERT((result == JSRegExpMatch) == (output[0] != offsetNoMatch));
1544         return output[0];
1545     }
1546
1547     Interpreter(BytecodePattern* pattern, unsigned* output, const CharType* input, unsigned length, unsigned start)
1548         : pattern(pattern)
1549         , unicode(pattern->unicode())
1550         , output(output)
1551         , input(input, start, length, pattern->unicode())
1552         , allocatorPool(0)
1553         , remainingMatchCount(matchLimit)
1554     {
1555     }
1556
1557 private:
1558     BytecodePattern* pattern;
1559     bool unicode;
1560     unsigned* output;
1561     InputStream input;
1562     BumpPointerPool* allocatorPool;
1563     unsigned remainingMatchCount;
1564 };
1565
1566 class ByteCompiler {
1567     struct ParenthesesStackEntry {
1568         unsigned beginTerm;
1569         unsigned savedAlternativeIndex;
1570         ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/)
1571             : beginTerm(beginTerm)
1572             , savedAlternativeIndex(savedAlternativeIndex)
1573         {
1574         }
1575     };
1576
1577 public:
1578     ByteCompiler(YarrPattern& pattern)
1579         : m_pattern(pattern)
1580     {
1581         m_currentAlternativeIndex = 0;
1582     }
1583
1584     std::unique_ptr<BytecodePattern> compile(BumpPointerAllocator* allocator)
1585     {
1586         regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough());
1587         emitDisjunction(m_pattern.m_body);
1588         regexEnd();
1589
1590         return std::make_unique<BytecodePattern>(WTFMove(m_bodyDisjunction), m_allParenthesesInfo, m_pattern, allocator);
1591     }
1592
1593     void checkInput(unsigned count)
1594     {
1595         m_bodyDisjunction->terms.append(ByteTerm::CheckInput(count));
1596     }
1597
1598     void uncheckInput(unsigned count)
1599     {
1600         m_bodyDisjunction->terms.append(ByteTerm::UncheckInput(count));
1601     }
1602     
1603     void assertionBOL(unsigned inputPosition)
1604     {
1605         m_bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition));
1606     }
1607
1608     void assertionEOL(unsigned inputPosition)
1609     {
1610         m_bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition));
1611     }
1612
1613     void assertionWordBoundary(bool invert, unsigned inputPosition)
1614     {
1615         m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition));
1616     }
1617
1618     void atomPatternCharacter(UChar32 ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1619     {
1620         if (m_pattern.ignoreCase()) {
1621             UChar32 lo = u_tolower(ch);
1622             UChar32 hi = u_toupper(ch);
1623
1624             if (lo != hi) {
1625                 m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType));
1626                 return;
1627             }
1628         }
1629
1630         m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType));
1631     }
1632
1633     void atomCharacterClass(CharacterClass* characterClass, bool invert, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1634     {
1635         m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));
1636
1637         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet();
1638         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
1639         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1640     }
1641
1642     void atomBackReference(unsigned subpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1643     {
1644         ASSERT(subpatternId);
1645
1646         m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
1647
1648         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet();
1649         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
1650         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1651     }
1652
1653     void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1654     {
1655         int beginTerm = m_bodyDisjunction->terms.size();
1656
1657         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1658         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1659         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1660         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1661
1662         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1663         m_currentAlternativeIndex = beginTerm + 1;
1664     }
1665
1666     void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1667     {
1668         int beginTerm = m_bodyDisjunction->terms.size();
1669
1670         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition));
1671         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1672         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1673         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1674
1675         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1676         m_currentAlternativeIndex = beginTerm + 1;
1677     }
1678
1679     void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1680     {
1681         // Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin,
1682         // then fix this up at the end! - simplifying this should make it much clearer.
1683         // https://bugs.webkit.org/show_bug.cgi?id=50136
1684
1685         int beginTerm = m_bodyDisjunction->terms.size();
1686
1687         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1688         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1689         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1690         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1691
1692         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1693         m_currentAlternativeIndex = beginTerm + 1;
1694     }
1695
1696     void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation)
1697     {
1698         int beginTerm = m_bodyDisjunction->terms.size();
1699
1700         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, 0));
1701         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1702         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1703         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1704
1705         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1706         m_currentAlternativeIndex = beginTerm + 1;
1707     }
1708
1709     void atomParentheticalAssertionEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1710     {
1711         unsigned beginTerm = popParenthesesStack();
1712         closeAlternative(beginTerm + 1);
1713         unsigned endTerm = m_bodyDisjunction->terms.size();
1714
1715         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin);
1716
1717         bool invert = m_bodyDisjunction->terms[beginTerm].invert();
1718         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1719
1720         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd, subpatternId, false, invert, inputPosition));
1721         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1722         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1723         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1724
1725         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
1726         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1727         m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
1728         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1729     }
1730
1731     void assertionDotStarEnclosure(bool bolAnchored, bool eolAnchored)
1732     {
1733         m_bodyDisjunction->terms.append(ByteTerm::DotStarEnclosure(bolAnchored, eolAnchored));
1734     }
1735
1736     unsigned popParenthesesStack()
1737     {
1738         ASSERT(m_parenthesesStack.size());
1739         int stackEnd = m_parenthesesStack.size() - 1;
1740         unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm;
1741         m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex;
1742         m_parenthesesStack.shrink(stackEnd);
1743
1744         ASSERT(beginTerm < m_bodyDisjunction->terms.size());
1745         ASSERT(m_currentAlternativeIndex < m_bodyDisjunction->terms.size());
1746
1747         return beginTerm;
1748     }
1749
1750 #ifndef NDEBUG
1751     void dumpDisjunction(ByteDisjunction* disjunction)
1752     {
1753         dataLogF("ByteDisjunction(%p):\n\t", disjunction);
1754         for (unsigned i = 0; i < disjunction->terms.size(); ++i)
1755             dataLogF("{ %d } ", disjunction->terms[i].type);
1756         dataLogF("\n");
1757     }
1758 #endif
1759
1760     void closeAlternative(int beginTerm)
1761     {
1762         int origBeginTerm = beginTerm;
1763         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin);
1764         int endIndex = m_bodyDisjunction->terms.size();
1765
1766         unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
1767
1768         if (!m_bodyDisjunction->terms[beginTerm].alternative.next)
1769             m_bodyDisjunction->terms.remove(beginTerm);
1770         else {
1771             while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
1772                 beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
1773                 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction);
1774                 m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
1775                 m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1776             }
1777
1778             m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
1779
1780             m_bodyDisjunction->terms.append(ByteTerm::AlternativeEnd());
1781             m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
1782         }
1783     }
1784
1785     void closeBodyAlternative()
1786     {
1787         int beginTerm = 0;
1788         int origBeginTerm = 0;
1789         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);
1790         int endIndex = m_bodyDisjunction->terms.size();
1791
1792         unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
1793
1794         while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
1795             beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
1796             ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction);
1797             m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
1798             m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1799         }
1800
1801         m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
1802
1803         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd());
1804         m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
1805     }
1806
1807     void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0)
1808     {
1809         unsigned beginTerm = popParenthesesStack();
1810         closeAlternative(beginTerm + 1);
1811         unsigned endTerm = m_bodyDisjunction->terms.size();
1812
1813         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1814
1815         ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm];
1816
1817         bool capture = parenthesesBegin.capture();
1818         unsigned subpatternId = parenthesesBegin.atom.subpatternId;
1819
1820         unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
1821         auto parenthesesDisjunction = std::make_unique<ByteDisjunction>(numSubpatterns, callFrameSize);
1822
1823         unsigned firstTermInParentheses = beginTerm + 1;
1824         parenthesesDisjunction->terms.reserveInitialCapacity(endTerm - firstTermInParentheses + 2);
1825
1826         parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin());
1827         for (unsigned termInParentheses = firstTermInParentheses; termInParentheses < endTerm; ++termInParentheses)
1828             parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]);
1829         parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd());
1830
1831         m_bodyDisjunction->terms.shrink(beginTerm);
1832
1833         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction.get(), capture, inputPosition));
1834         m_allParenthesesInfo.append(WTFMove(parenthesesDisjunction));
1835
1836         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
1837         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1838         m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1839     }
1840
1841     void atomParenthesesOnceEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1842     {
1843         unsigned beginTerm = popParenthesesStack();
1844         closeAlternative(beginTerm + 1);
1845         unsigned endTerm = m_bodyDisjunction->terms.size();
1846
1847         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1848
1849         bool capture = m_bodyDisjunction->terms[beginTerm].capture();
1850         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1851
1852         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, capture, false, inputPosition));
1853         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1854         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1855         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1856
1857         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
1858         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1859         m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
1860         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1861     }
1862
1863     void atomParenthesesTerminalEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1864     {
1865         unsigned beginTerm = popParenthesesStack();
1866         closeAlternative(beginTerm + 1);
1867         unsigned endTerm = m_bodyDisjunction->terms.size();
1868
1869         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
1870
1871         bool capture = m_bodyDisjunction->terms[beginTerm].capture();
1872         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1873
1874         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, capture, false, inputPosition));
1875         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1876         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1877         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1878
1879         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
1880         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1881         m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
1882         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1883     }
1884
1885     void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough)
1886     {
1887         m_bodyDisjunction = std::make_unique<ByteDisjunction>(numSubpatterns, callFrameSize);
1888         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough));
1889         m_bodyDisjunction->terms[0].frameLocation = 0;
1890         m_currentAlternativeIndex = 0;
1891     }
1892
1893     void regexEnd()
1894     {
1895         closeBodyAlternative();
1896     }
1897
1898     void alternativeBodyDisjunction(bool onceThrough)
1899     {
1900         int newAlternativeIndex = m_bodyDisjunction->terms.size();
1901         m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
1902         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction(onceThrough));
1903
1904         m_currentAlternativeIndex = newAlternativeIndex;
1905     }
1906
1907     void alternativeDisjunction()
1908     {
1909         int newAlternativeIndex = m_bodyDisjunction->terms.size();
1910         m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
1911         m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction());
1912
1913         m_currentAlternativeIndex = newAlternativeIndex;
1914     }
1915
1916     void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0)
1917     {
1918         for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
1919             unsigned currentCountAlreadyChecked = inputCountAlreadyChecked;
1920
1921             PatternAlternative* alternative = disjunction->m_alternatives[alt].get();
1922
1923             if (alt) {
1924                 if (disjunction == m_pattern.m_body)
1925                     alternativeBodyDisjunction(alternative->onceThrough());
1926                 else
1927                     alternativeDisjunction();
1928             }
1929
1930             unsigned minimumSize = alternative->m_minimumSize;
1931             ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked);
1932             unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked;
1933
1934             if (countToCheck) {
1935                 checkInput(countToCheck);
1936                 currentCountAlreadyChecked += countToCheck;
1937             }
1938
1939             for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
1940                 PatternTerm& term = alternative->m_terms[i];
1941
1942                 switch (term.type) {
1943                 case PatternTerm::TypeAssertionBOL:
1944                     assertionBOL(currentCountAlreadyChecked - term.inputPosition);
1945                     break;
1946
1947                 case PatternTerm::TypeAssertionEOL:
1948                     assertionEOL(currentCountAlreadyChecked - term.inputPosition);
1949                     break;
1950
1951                 case PatternTerm::TypeAssertionWordBoundary:
1952                     assertionWordBoundary(term.invert(), currentCountAlreadyChecked - term.inputPosition);
1953                     break;
1954
1955                 case PatternTerm::TypePatternCharacter:
1956                     atomPatternCharacter(term.patternCharacter, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
1957                     break;
1958
1959                 case PatternTerm::TypeCharacterClass:
1960                     atomCharacterClass(term.characterClass, term.invert(), currentCountAlreadyChecked- term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
1961                     break;
1962
1963                 case PatternTerm::TypeBackReference:
1964                     atomBackReference(term.backReferenceSubpatternId, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
1965                         break;
1966
1967                 case PatternTerm::TypeForwardReference:
1968                     break;
1969
1970                 case PatternTerm::TypeParenthesesSubpattern: {
1971                     unsigned disjunctionAlreadyCheckedCount = 0;
1972                     if (term.quantityCount == 1 && !term.parentheses.isCopy) {
1973                         unsigned alternativeFrameLocation = term.frameLocation;
1974                         // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame.
1975                         if (term.quantityType == QuantifierFixedCount)
1976                             disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;
1977                         else
1978                             alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1979                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1980                         atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, alternativeFrameLocation);
1981                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
1982                         atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
1983                     } else if (term.parentheses.isTerminal) {
1984                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1985                         atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesOnce);
1986                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
1987                         atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
1988                     } else {
1989                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1990                         atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, 0);
1991                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
1992                         atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
1993                     }
1994                     break;
1995                 }
1996
1997                 case PatternTerm::TypeParentheticalAssertion: {
1998                     unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion;
1999
2000                     ASSERT(currentCountAlreadyChecked >= static_cast<unsigned>(term.inputPosition));
2001                     unsigned positiveInputOffset = currentCountAlreadyChecked - static_cast<unsigned>(term.inputPosition);
2002                     unsigned uncheckAmount = 0;
2003                     if (positiveInputOffset > term.parentheses.disjunction->m_minimumSize) {
2004                         uncheckAmount = positiveInputOffset - term.parentheses.disjunction->m_minimumSize;
2005                         uncheckInput(uncheckAmount);
2006                         currentCountAlreadyChecked -= uncheckAmount;
2007                     }
2008
2009                     atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation);
2010                     emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset - uncheckAmount);
2011                     atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityCount, term.quantityType);
2012                     if (uncheckAmount) {
2013                         checkInput(uncheckAmount);
2014                         currentCountAlreadyChecked += uncheckAmount;
2015                     }
2016                     break;
2017                 }
2018
2019                 case PatternTerm::TypeDotStarEnclosure:
2020                     assertionDotStarEnclosure(term.anchors.bolAnchor, term.anchors.eolAnchor);
2021                     break;
2022                 }
2023             }
2024         }
2025     }
2026
2027 private:
2028     YarrPattern& m_pattern;
2029     std::unique_ptr<ByteDisjunction> m_bodyDisjunction;
2030     unsigned m_currentAlternativeIndex;
2031     Vector<ParenthesesStackEntry> m_parenthesesStack;
2032     Vector<std::unique_ptr<ByteDisjunction>> m_allParenthesesInfo;
2033 };
2034
2035 std::unique_ptr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator)
2036 {
2037     return ByteCompiler(pattern).compile(allocator);
2038 }
2039
2040 unsigned interpret(BytecodePattern* bytecode, const String& input, unsigned start, unsigned* output)
2041 {
2042     SuperSamplerScope superSamplerScope(false);
2043     if (input.is8Bit())
2044         return Interpreter<LChar>(bytecode, output, input.characters8(), input.length(), start).interpret();
2045     return Interpreter<UChar>(bytecode, output, input.characters16(), input.length(), start).interpret();
2046 }
2047
2048 unsigned interpret(BytecodePattern* bytecode, const LChar* input, unsigned length, unsigned start, unsigned* output)
2049 {
2050     SuperSamplerScope superSamplerScope(false);
2051     return Interpreter<LChar>(bytecode, output, input, length, start).interpret();
2052 }
2053
2054 unsigned interpret(BytecodePattern* bytecode, const UChar* input, unsigned length, unsigned start, unsigned* output)
2055 {
2056     SuperSamplerScope superSamplerScope(false);
2057     return Interpreter<UChar>(bytecode, output, input, length, start).interpret();
2058 }
2059
2060 // These should be the same for both UChar & LChar.
2061 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter);
2062 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass);
2063 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference);
2064 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative);
2065 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion);
2066 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce);
2067 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheses) == (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses);
2068
2069
2070 } }