Enhance the MacroAssembler and LinkBuffer to support pointer profiling.
[WebKit-https.git] / Source / JavaScriptCore / yarr / YarrJIT.cpp
1 /*
2  * Copyright (C) 2009-2018 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
24  */
25
26 #include "config.h"
27 #include "YarrJIT.h"
28
29 #include <wtf/ASCIICType.h>
30 #include "LinkBuffer.h"
31 #include "Options.h"
32 #include "VM.h"
33 #include "Yarr.h"
34 #include "YarrCanonicalize.h"
35
36 #if ENABLE(YARR_JIT)
37
38 using namespace WTF;
39
40 namespace JSC { namespace Yarr {
41
42 template<YarrJITCompileMode compileMode>
43 class YarrGenerator : private MacroAssembler {
44     friend void jitCompile(VM*, YarrCodeBlock&, const String& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
45
46 #if CPU(ARM)
47     static const RegisterID input = ARMRegisters::r0;
48     static const RegisterID index = ARMRegisters::r1;
49     static const RegisterID length = ARMRegisters::r2;
50     static const RegisterID output = ARMRegisters::r3;
51
52     static const RegisterID regT0 = ARMRegisters::r4;
53     static const RegisterID regT1 = ARMRegisters::r5;
54     static const RegisterID initialStart = ARMRegisters::r8;
55
56     static const RegisterID returnRegister = ARMRegisters::r0;
57     static const RegisterID returnRegister2 = ARMRegisters::r1;
58
59 #define HAVE_INITIAL_START_REG
60 #elif CPU(ARM64)
61     // Argument registers
62     static const RegisterID input = ARM64Registers::x0;
63     static const RegisterID index = ARM64Registers::x1;
64     static const RegisterID length = ARM64Registers::x2;
65     static const RegisterID output = ARM64Registers::x3;
66     static const RegisterID freelistRegister = ARM64Registers::x4;
67     static const RegisterID freelistSizeRegister = ARM64Registers::x5;
68
69     // Scratch registers
70     static const RegisterID regT0 = ARM64Registers::x6;
71     static const RegisterID regT1 = ARM64Registers::x7;
72     static const RegisterID regT2 = ARM64Registers::x8;
73     static const RegisterID remainingMatchCount = ARM64Registers::x9;
74     static const RegisterID regUnicodeInputAndTrail = ARM64Registers::x10;
75     static const RegisterID initialStart = ARM64Registers::x11;
76     static const RegisterID supplementaryPlanesBase = ARM64Registers::x12;
77     static const RegisterID surrogateTagMask = ARM64Registers::x13;
78     static const RegisterID leadingSurrogateTag = ARM64Registers::x14;
79     static const RegisterID trailingSurrogateTag = ARM64Registers::x15;
80
81     static const RegisterID returnRegister = ARM64Registers::x0;
82     static const RegisterID returnRegister2 = ARM64Registers::x1;
83
84 #define HAVE_INITIAL_START_REG
85 #define JIT_UNICODE_EXPRESSIONS
86 #elif CPU(MIPS)
87     static const RegisterID input = MIPSRegisters::a0;
88     static const RegisterID index = MIPSRegisters::a1;
89     static const RegisterID length = MIPSRegisters::a2;
90     static const RegisterID output = MIPSRegisters::a3;
91
92     static const RegisterID regT0 = MIPSRegisters::t4;
93     static const RegisterID regT1 = MIPSRegisters::t5;
94     static const RegisterID initialStart = MIPSRegisters::t6;
95
96     static const RegisterID returnRegister = MIPSRegisters::v0;
97     static const RegisterID returnRegister2 = MIPSRegisters::v1;
98
99 #define HAVE_INITIAL_START_REG
100 #elif CPU(X86)
101     static const RegisterID input = X86Registers::eax;
102     static const RegisterID index = X86Registers::edx;
103     static const RegisterID length = X86Registers::ecx;
104     static const RegisterID output = X86Registers::edi;
105
106     static const RegisterID regT0 = X86Registers::ebx;
107     static const RegisterID regT1 = X86Registers::esi;
108
109     static const RegisterID returnRegister = X86Registers::eax;
110     static const RegisterID returnRegister2 = X86Registers::edx;
111 #elif CPU(X86_64)
112 #if !OS(WINDOWS)
113     // Argument registers
114     static const RegisterID input = X86Registers::edi;
115     static const RegisterID index = X86Registers::esi;
116     static const RegisterID length = X86Registers::edx;
117     static const RegisterID output = X86Registers::ecx;
118     static const RegisterID freelistRegister = X86Registers::r8;
119     static const RegisterID freelistSizeRegister = X86Registers::r9; // Only used during initialization.
120 #else
121     // If the return value doesn't fit in 64bits, its destination is pointed by rcx and the parameters are shifted.
122     // http://msdn.microsoft.com/en-us/library/7572ztz4.aspx
123     COMPILE_ASSERT(sizeof(MatchResult) > sizeof(void*), MatchResult_does_not_fit_in_64bits);
124     static const RegisterID input = X86Registers::edx;
125     static const RegisterID index = X86Registers::r8;
126     static const RegisterID length = X86Registers::r9;
127     static const RegisterID output = X86Registers::r10;
128 #endif
129
130     // Scratch registers
131     static const RegisterID regT0 = X86Registers::eax;
132 #if !OS(WINDOWS)
133     static const RegisterID regT1 = X86Registers::r9;
134     static const RegisterID regT2 = X86Registers::r10;
135 #else
136     static const RegisterID regT1 = X86Registers::ecx;
137     static const RegisterID regT2 = X86Registers::edi;
138 #endif
139
140     static const RegisterID initialStart = X86Registers::ebx;
141 #if !OS(WINDOWS)
142     static const RegisterID remainingMatchCount = X86Registers::r12;
143 #else
144     static const RegisterID remainingMatchCount = X86Registers::esi;
145 #endif
146     static const RegisterID regUnicodeInputAndTrail = X86Registers::r13;
147     static const RegisterID leadingSurrogateTag = X86Registers::r14;
148     static const RegisterID trailingSurrogateTag = X86Registers::r15;
149
150     static const RegisterID returnRegister = X86Registers::eax;
151     static const RegisterID returnRegister2 = X86Registers::edx;
152
153     const TrustedImm32 supplementaryPlanesBase = TrustedImm32(0x10000);
154     const TrustedImm32 surrogateTagMask = TrustedImm32(0xfffffc00);
155 #define HAVE_INITIAL_START_REG
156 #define JIT_UNICODE_EXPRESSIONS
157 #endif
158
159 #if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
160     struct ParenContextSizes {
161         size_t m_numSubpatterns;
162         size_t m_frameSlots;
163
164         ParenContextSizes(size_t numSubpatterns, size_t frameSlots)
165             : m_numSubpatterns(numSubpatterns)
166             , m_frameSlots(frameSlots)
167         {
168         }
169
170         size_t numSubpatterns() { return m_numSubpatterns; }
171
172         size_t frameSlots() { return m_frameSlots; }
173     };
174
175     struct ParenContext {
176         struct ParenContext* next;
177         uint32_t begin;
178         uint32_t matchAmount;
179         uintptr_t returnAddress;
180         struct Subpatterns {
181             unsigned start;
182             unsigned end;
183         } subpatterns[0];
184         uintptr_t frameSlots[0];
185
186         static size_t sizeFor(ParenContextSizes& parenContextSizes)
187         {
188             return sizeof(ParenContext) + sizeof(Subpatterns) * parenContextSizes.numSubpatterns() + sizeof(uintptr_t) * parenContextSizes.frameSlots();
189         }
190
191         static ptrdiff_t nextOffset()
192         {
193             return offsetof(ParenContext, next);
194         }
195
196         static ptrdiff_t beginOffset()
197         {
198             return offsetof(ParenContext, begin);
199         }
200
201         static ptrdiff_t matchAmountOffset()
202         {
203             return offsetof(ParenContext, matchAmount);
204         }
205
206         static ptrdiff_t returnAddressOffset()
207         {
208             return offsetof(ParenContext, returnAddress);
209         }
210
211         static ptrdiff_t subpatternOffset(size_t subpattern)
212         {
213             return offsetof(ParenContext, subpatterns) + (subpattern - 1) * sizeof(Subpatterns);
214         }
215
216         static ptrdiff_t savedFrameOffset(ParenContextSizes& parenContextSizes)
217         {
218             return offsetof(ParenContext, subpatterns) + (parenContextSizes.numSubpatterns()) * sizeof(Subpatterns);
219         }
220     };
221
222     void initParenContextFreeList()
223     {
224         RegisterID parenContextPointer = regT0;
225         RegisterID nextParenContextPointer = regT2;
226
227         size_t parenContextSize = ParenContext::sizeFor(m_parenContextSizes);
228
229         parenContextSize = WTF::roundUpToMultipleOf<sizeof(uintptr_t)>(parenContextSize);
230
231         // Check that the paren context is a reasonable size.
232         if (parenContextSize > INT16_MAX)
233             m_abortExecution.append(jump());
234
235         Jump emptyFreeList = branchTestPtr(Zero, freelistRegister);
236         move(freelistRegister, parenContextPointer);
237         addPtr(TrustedImm32(parenContextSize), freelistRegister, nextParenContextPointer);
238         addPtr(freelistRegister, freelistSizeRegister);
239         subPtr(TrustedImm32(parenContextSize), freelistSizeRegister);
240
241         Label loopTop(this);
242         Jump initDone = branchPtr(Above, nextParenContextPointer, freelistSizeRegister);
243         storePtr(nextParenContextPointer, Address(parenContextPointer, ParenContext::nextOffset()));
244         move(nextParenContextPointer, parenContextPointer);
245         addPtr(TrustedImm32(parenContextSize), parenContextPointer, nextParenContextPointer);
246         jump(loopTop);
247
248         initDone.link(this);
249         storePtr(TrustedImmPtr(nullptr), Address(parenContextPointer, ParenContext::nextOffset()));
250         emptyFreeList.link(this);
251     }
252
253     void allocateParenContext(RegisterID result)
254     {
255         m_abortExecution.append(branchTestPtr(Zero, freelistRegister));
256         sub32(TrustedImm32(1), remainingMatchCount);
257         m_hitMatchLimit.append(branchTestPtr(Zero, remainingMatchCount));
258         move(freelistRegister, result);
259         loadPtr(Address(freelistRegister, ParenContext::nextOffset()), freelistRegister);
260     }
261
262     void freeParenContext(RegisterID headPtrRegister, RegisterID newHeadPtrRegister)
263     {
264         loadPtr(Address(headPtrRegister, ParenContext::nextOffset()), newHeadPtrRegister);
265         storePtr(freelistRegister, Address(headPtrRegister, ParenContext::nextOffset()));
266         move(headPtrRegister, freelistRegister);
267     }
268
269     void saveParenContext(RegisterID parenContextReg, RegisterID tempReg, unsigned firstSubpattern, unsigned lastSubpattern, unsigned subpatternBaseFrameLocation)
270     {
271         store32(index, Address(parenContextReg, ParenContext::beginOffset()));
272         loadFromFrame(subpatternBaseFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), tempReg);
273         store32(tempReg, Address(parenContextReg, ParenContext::matchAmountOffset()));
274         loadFromFrame(subpatternBaseFrameLocation + BackTrackInfoParentheses::returnAddressIndex(), tempReg);
275         storePtr(tempReg, Address(parenContextReg, ParenContext::returnAddressOffset()));
276         if (compileMode == IncludeSubpatterns) {
277             for (unsigned subpattern = firstSubpattern; subpattern <= lastSubpattern; subpattern++) {
278                 loadPtr(Address(output, (subpattern << 1) * sizeof(unsigned)), tempReg);
279                 storePtr(tempReg, Address(parenContextReg, ParenContext::subpatternOffset(subpattern)));
280                 clearSubpatternStart(subpattern);
281             }
282         }
283         subpatternBaseFrameLocation += YarrStackSpaceForBackTrackInfoParentheses;
284         for (unsigned frameLocation = subpatternBaseFrameLocation; frameLocation < m_parenContextSizes.frameSlots(); frameLocation++) {
285             loadFromFrame(frameLocation, tempReg);
286             storePtr(tempReg, Address(parenContextReg, ParenContext::savedFrameOffset(m_parenContextSizes) + frameLocation * sizeof(uintptr_t)));
287         }
288     }
289
290     void restoreParenContext(RegisterID parenContextReg, RegisterID tempReg, unsigned firstSubpattern, unsigned lastSubpattern, unsigned subpatternBaseFrameLocation)
291     {
292         load32(Address(parenContextReg, ParenContext::beginOffset()), index);
293         storeToFrame(index, subpatternBaseFrameLocation + BackTrackInfoParentheses::beginIndex());
294         load32(Address(parenContextReg, ParenContext::matchAmountOffset()), tempReg);
295         storeToFrame(tempReg, subpatternBaseFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
296         loadPtr(Address(parenContextReg, ParenContext::returnAddressOffset()), tempReg);
297         storeToFrame(tempReg, subpatternBaseFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
298         if (compileMode == IncludeSubpatterns) {
299             for (unsigned subpattern = firstSubpattern; subpattern <= lastSubpattern; subpattern++) {
300                 loadPtr(Address(parenContextReg, ParenContext::subpatternOffset(subpattern)), tempReg);
301                 storePtr(tempReg, Address(output, (subpattern << 1) * sizeof(unsigned)));
302             }
303         }
304         subpatternBaseFrameLocation += YarrStackSpaceForBackTrackInfoParentheses;
305         for (unsigned frameLocation = subpatternBaseFrameLocation; frameLocation < m_parenContextSizes.frameSlots(); frameLocation++) {
306             loadPtr(Address(parenContextReg, ParenContext::savedFrameOffset(m_parenContextSizes) + frameLocation * sizeof(uintptr_t)), tempReg);
307             storeToFrame(tempReg, frameLocation);
308         }
309     }
310 #endif
311
312     void optimizeAlternative(PatternAlternative* alternative)
313     {
314         if (!alternative->m_terms.size())
315             return;
316
317         for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) {
318             PatternTerm& term = alternative->m_terms[i];
319             PatternTerm& nextTerm = alternative->m_terms[i + 1];
320
321             // We can move BMP only character classes after fixed character terms.
322             if ((term.type == PatternTerm::TypeCharacterClass)
323                 && (term.quantityType == QuantifierFixedCount)
324                 && (!m_decodeSurrogatePairs || !term.characterClass->m_hasNonBMPCharacters)
325                 && (nextTerm.type == PatternTerm::TypePatternCharacter)
326                 && (nextTerm.quantityType == QuantifierFixedCount)) {
327                 PatternTerm termCopy = term;
328                 alternative->m_terms[i] = nextTerm;
329                 alternative->m_terms[i + 1] = termCopy;
330             }
331         }
332     }
333
334     void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar32* matches, unsigned matchCount)
335     {
336         do {
337             // pick which range we're going to generate
338             int which = count >> 1;
339             char lo = ranges[which].begin;
340             char hi = ranges[which].end;
341
342             // check if there are any ranges or matches below lo.  If not, just jl to failure -
343             // if there is anything else to check, check that first, if it falls through jmp to failure.
344             if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
345                 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
346
347                 // generate code for all ranges before this one
348                 if (which)
349                     matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
350
351                 while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
352                     matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
353                     ++*matchIndex;
354                 }
355                 failures.append(jump());
356
357                 loOrAbove.link(this);
358             } else if (which) {
359                 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
360
361                 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
362                 failures.append(jump());
363
364                 loOrAbove.link(this);
365             } else
366                 failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));
367
368             while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
369                 ++*matchIndex;
370
371             matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
372             // fall through to here, the value is above hi.
373
374             // shuffle along & loop around if there are any more matches to handle.
375             unsigned next = which + 1;
376             ranges += next;
377             count -= next;
378         } while (count);
379     }
380
381     void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
382     {
383         if (charClass->m_table && !m_decodeSurrogatePairs) {
384             ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table));
385             matchDest.append(branchTest8(charClass->m_tableInverted ? Zero : NonZero, tableEntry));
386             return;
387         }
388         JumpList unicodeFail;
389         if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
390             JumpList isAscii;
391             if (charClass->m_matches.size() || charClass->m_ranges.size())
392                 isAscii.append(branch32(LessThanOrEqual, character, TrustedImm32(0x7f)));
393
394             if (charClass->m_matchesUnicode.size()) {
395                 for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
396                     UChar32 ch = charClass->m_matchesUnicode[i];
397                     matchDest.append(branch32(Equal, character, Imm32(ch)));
398                 }
399             }
400
401             if (charClass->m_rangesUnicode.size()) {
402                 for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
403                     UChar32 lo = charClass->m_rangesUnicode[i].begin;
404                     UChar32 hi = charClass->m_rangesUnicode[i].end;
405
406                     Jump below = branch32(LessThan, character, Imm32(lo));
407                     matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
408                     below.link(this);
409                 }
410             }
411
412             if (charClass->m_matches.size() || charClass->m_ranges.size())
413                 unicodeFail = jump();
414             isAscii.link(this);
415         }
416
417         if (charClass->m_ranges.size()) {
418             unsigned matchIndex = 0;
419             JumpList failures;
420             matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size());
421             while (matchIndex < charClass->m_matches.size())
422                 matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++])));
423
424             failures.link(this);
425         } else if (charClass->m_matches.size()) {
426             // optimization: gather 'a','A' etc back together, can mask & test once.
427             Vector<char> matchesAZaz;
428
429             for (unsigned i = 0; i < charClass->m_matches.size(); ++i) {
430                 char ch = charClass->m_matches[i];
431                 if (m_pattern.ignoreCase()) {
432                     if (isASCIILower(ch)) {
433                         matchesAZaz.append(ch);
434                         continue;
435                     }
436                     if (isASCIIUpper(ch))
437                         continue;
438                 }
439                 matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
440             }
441
442             if (unsigned countAZaz = matchesAZaz.size()) {
443                 or32(TrustedImm32(32), character);
444                 for (unsigned i = 0; i < countAZaz; ++i)
445                     matchDest.append(branch32(Equal, character, TrustedImm32(matchesAZaz[i])));
446             }
447         }
448
449         if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size())
450             unicodeFail.link(this);
451     }
452
453     // Jumps if input not available; will have (incorrectly) incremented already!
454     Jump jumpIfNoAvailableInput(unsigned countToCheck = 0)
455     {
456         if (countToCheck)
457             add32(Imm32(countToCheck), index);
458         return branch32(Above, index, length);
459     }
460
461     Jump jumpIfAvailableInput(unsigned countToCheck)
462     {
463         add32(Imm32(countToCheck), index);
464         return branch32(BelowOrEqual, index, length);
465     }
466
467     Jump checkInput()
468     {
469         return branch32(BelowOrEqual, index, length);
470     }
471
472     Jump atEndOfInput()
473     {
474         return branch32(Equal, index, length);
475     }
476
477     Jump notAtEndOfInput()
478     {
479         return branch32(NotEqual, index, length);
480     }
481
482     BaseIndex negativeOffsetIndexedAddress(Checked<unsigned> negativeCharacterOffset, RegisterID tempReg, RegisterID indexReg = index)
483     {
484         RegisterID base = input;
485
486         // BaseIndex() addressing can take a int32_t offset. Given that we can have a regular
487         // expression that has unsigned character offsets, BaseIndex's signed offset is insufficient
488         // for addressing in extreme cases where we might underflow. Therefore we check to see if
489         // negativeCharacterOffset will underflow directly or after converting for 16 bit characters.
490         // If so, we do our own address calculating by adjusting the base, using the result register
491         // as a temp address register.
492         unsigned maximumNegativeOffsetForCharacterSize = m_charSize == Char8 ? 0x7fffffff : 0x3fffffff;
493         unsigned offsetAdjustAmount = 0x40000000;
494         if (negativeCharacterOffset.unsafeGet() > maximumNegativeOffsetForCharacterSize) {
495             base = tempReg;
496             move(input, base);
497             while (negativeCharacterOffset.unsafeGet() > maximumNegativeOffsetForCharacterSize) {
498                 subPtr(TrustedImm32(offsetAdjustAmount), base);
499                 if (m_charSize != Char8)
500                     subPtr(TrustedImm32(offsetAdjustAmount), base);
501                 negativeCharacterOffset -= offsetAdjustAmount;
502             }
503         }
504
505         Checked<int32_t> characterOffset(-static_cast<int32_t>(negativeCharacterOffset.unsafeGet()));
506
507         if (m_charSize == Char8)
508             return BaseIndex(input, indexReg, TimesOne, (characterOffset * static_cast<int32_t>(sizeof(char))).unsafeGet());
509
510         return BaseIndex(input, indexReg, TimesTwo, (characterOffset * static_cast<int32_t>(sizeof(UChar))).unsafeGet());
511     }
512
513 #ifdef JIT_UNICODE_EXPRESSIONS
514     void tryReadUnicodeCharImpl(RegisterID resultReg)
515     {
516         ASSERT(m_charSize == Char16);
517
518         JumpList notUnicode;
519         load16Unaligned(regUnicodeInputAndTrail, resultReg);
520         and32(surrogateTagMask, resultReg, regT2);
521         notUnicode.append(branch32(NotEqual, regT2, leadingSurrogateTag));
522         addPtr(TrustedImm32(2), regUnicodeInputAndTrail);
523         getEffectiveAddress(BaseIndex(input, length, TimesTwo), regT2);
524         notUnicode.append(branch32(AboveOrEqual, regUnicodeInputAndTrail, regT2));
525         load16Unaligned(Address(regUnicodeInputAndTrail), regUnicodeInputAndTrail);
526         and32(surrogateTagMask, regUnicodeInputAndTrail, regT2);
527         notUnicode.append(branch32(NotEqual, regT2, trailingSurrogateTag));
528         sub32(leadingSurrogateTag, resultReg);
529         sub32(trailingSurrogateTag, regUnicodeInputAndTrail);
530         lshift32(TrustedImm32(10), resultReg);
531         or32(regUnicodeInputAndTrail, resultReg);
532         add32(supplementaryPlanesBase, resultReg);
533         notUnicode.link(this);
534     }
535
536     void tryReadUnicodeChar(BaseIndex address, RegisterID resultReg)
537     {
538         ASSERT(m_charSize == Char16);
539
540         getEffectiveAddress(address, regUnicodeInputAndTrail);
541
542         if (resultReg == regT0)
543             m_tryReadUnicodeCharacterCalls.append(nearCall());
544         else
545             tryReadUnicodeCharImpl(resultReg);
546     }
547 #endif
548
549     void readCharacter(Checked<unsigned> negativeCharacterOffset, RegisterID resultReg, RegisterID indexReg = index)
550     {
551         BaseIndex address = negativeOffsetIndexedAddress(negativeCharacterOffset, resultReg, indexReg);
552
553         if (m_charSize == Char8)
554             load8(address, resultReg);
555 #ifdef JIT_UNICODE_EXPRESSIONS
556         else if (m_decodeSurrogatePairs)
557             tryReadUnicodeChar(address, resultReg);
558 #endif
559         else
560             load16Unaligned(address, resultReg);
561     }
562
563     Jump jumpIfCharNotEquals(UChar32 ch, Checked<unsigned> negativeCharacterOffset, RegisterID character)
564     {
565         readCharacter(negativeCharacterOffset, character);
566
567         // For case-insesitive compares, non-ascii characters that have different
568         // upper & lower case representations are converted to a character class.
569         ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(ch) || isCanonicallyUnique(ch, m_canonicalMode));
570         if (m_pattern.ignoreCase() && isASCIIAlpha(ch)) {
571             or32(TrustedImm32(0x20), character);
572             ch |= 0x20;
573         }
574
575         return branch32(NotEqual, character, Imm32(ch));
576     }
577     
578     void storeToFrame(RegisterID reg, unsigned frameLocation)
579     {
580         poke(reg, frameLocation);
581     }
582
583     void storeToFrame(TrustedImm32 imm, unsigned frameLocation)
584     {
585         poke(imm, frameLocation);
586     }
587
588 #if CPU(ARM64) || CPU(X86_64)
589     void storeToFrame(TrustedImmPtr imm, unsigned frameLocation)
590     {
591         poke(imm, frameLocation);
592     }
593 #endif
594
595     DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
596     {
597         return storePtrWithPatch(TrustedImmPtr(nullptr), Address(stackPointerRegister, frameLocation * sizeof(void*)));
598     }
599
600     void loadFromFrame(unsigned frameLocation, RegisterID reg)
601     {
602         peek(reg, frameLocation);
603     }
604
605     void loadFromFrameAndJump(unsigned frameLocation)
606     {
607         jump(Address(stackPointerRegister, frameLocation * sizeof(void*)), ptrTag(YarrBacktrackPtrTag, &m_codeBlock));
608     }
609
610     unsigned alignCallFrameSizeInBytes(unsigned callFrameSize)
611     {
612         if (!callFrameSize)
613             return 0;
614
615         callFrameSize *= sizeof(void*);
616         if (callFrameSize / sizeof(void*) != m_pattern.m_body->m_callFrameSize)
617             CRASH();
618         callFrameSize = (callFrameSize + 0x3f) & ~0x3f;
619         return callFrameSize;
620     }
621     void initCallFrame()
622     {
623         unsigned callFrameSizeInBytes = alignCallFrameSizeInBytes(m_pattern.m_body->m_callFrameSize);
624         if (callFrameSizeInBytes) {
625 #if CPU(X86_64) || CPU(ARM64)
626             if (Options::zeroStackFrame()) {
627                 // We need to start from the stack pointer, because we could have spilled callee saves
628                 move(stackPointerRegister, regT0);
629                 subPtr(Imm32(callFrameSizeInBytes), stackPointerRegister);
630                 if (callFrameSizeInBytes <= 128) {
631                     for (unsigned offset = 0; offset < callFrameSizeInBytes; offset += sizeof(intptr_t))
632                         storePtr(TrustedImm32(0), Address(regT0, -8 - offset));
633                 } else {
634                     Label zeroLoop = label();
635                     subPtr(TrustedImm32(sizeof(intptr_t) * 2), regT0);
636 #if CPU(ARM64)
637                     storePair64(ARM64Registers::zr, ARM64Registers::zr, regT0);
638 #else
639                     storePtr(TrustedImm32(0), Address(regT0));
640                     storePtr(TrustedImm32(0), Address(regT0, sizeof(intptr_t)));
641 #endif
642                     branchPtr(NotEqual, regT0, stackPointerRegister).linkTo(zeroLoop, this);
643                 }
644             } else
645 #endif
646                 subPtr(Imm32(callFrameSizeInBytes), stackPointerRegister);
647
648         }
649     }
650     void removeCallFrame()
651     {
652         unsigned callFrameSizeInBytes = alignCallFrameSizeInBytes(m_pattern.m_body->m_callFrameSize);
653         if (callFrameSizeInBytes)
654             addPtr(Imm32(callFrameSizeInBytes), stackPointerRegister);
655     }
656
657     void generateFailReturn()
658     {
659         move(TrustedImmPtr((void*)WTF::notFound), returnRegister);
660         move(TrustedImm32(0), returnRegister2);
661         generateReturn();
662     }
663
664     void generateJITFailReturn()
665     {
666         if (m_abortExecution.empty() && m_hitMatchLimit.empty())
667             return;
668
669         JumpList finishExiting;
670         if (!m_abortExecution.empty()) {
671             m_abortExecution.link(this);
672             move(TrustedImmPtr((void*)static_cast<size_t>(-2)), returnRegister);
673             finishExiting.append(jump());
674         }
675
676         if (!m_hitMatchLimit.empty()) {
677             m_hitMatchLimit.link(this);
678             move(TrustedImmPtr((void*)static_cast<size_t>(-1)), returnRegister);
679         }
680
681         finishExiting.link(this);
682         removeCallFrame();
683         move(TrustedImm32(0), returnRegister2);
684         generateReturn();
685     }
686
687     // Used to record subpatterns, should only be called if compileMode is IncludeSubpatterns.
688     void setSubpatternStart(RegisterID reg, unsigned subpattern)
689     {
690         ASSERT(subpattern);
691         // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
692         store32(reg, Address(output, (subpattern << 1) * sizeof(int)));
693     }
694     void setSubpatternEnd(RegisterID reg, unsigned subpattern)
695     {
696         ASSERT(subpattern);
697         // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
698         store32(reg, Address(output, ((subpattern << 1) + 1) * sizeof(int)));
699     }
700     void clearSubpatternStart(unsigned subpattern)
701     {
702         ASSERT(subpattern);
703         // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
704         store32(TrustedImm32(-1), Address(output, (subpattern << 1) * sizeof(int)));
705     }
706
707     void clearMatches(unsigned subpattern, unsigned lastSubpattern)
708     {
709         for (; subpattern <= lastSubpattern; subpattern++)
710             clearSubpatternStart(subpattern);
711     }
712
713     // We use one of three different strategies to track the start of the current match,
714     // while matching.
715     // 1) If the pattern has a fixed size, do nothing! - we calculate the value lazily
716     //    at the end of matching. This is irrespective of compileMode, and in this case
717     //    these methods should never be called.
718     // 2) If we're compiling IncludeSubpatterns, 'output' contains a pointer to an output
719     //    vector, store the match start in the output vector.
720     // 3) If we're compiling MatchOnly, 'output' is unused, store the match start directly
721     //    in this register.
722     void setMatchStart(RegisterID reg)
723     {
724         ASSERT(!m_pattern.m_body->m_hasFixedSize);
725         if (compileMode == IncludeSubpatterns)
726             store32(reg, output);
727         else
728             move(reg, output);
729     }
730     void getMatchStart(RegisterID reg)
731     {
732         ASSERT(!m_pattern.m_body->m_hasFixedSize);
733         if (compileMode == IncludeSubpatterns)
734             load32(output, reg);
735         else
736             move(output, reg);
737     }
738
739     enum YarrOpCode {
740         // These nodes wrap body alternatives - those in the main disjunction,
741         // rather than subpatterns or assertions. These are chained together in
742         // a doubly linked list, with a 'begin' node for the first alternative,
743         // a 'next' node for each subsequent alternative, and an 'end' node at
744         // the end. In the case of repeating alternatives, the 'end' node also
745         // has a reference back to 'begin'.
746         OpBodyAlternativeBegin,
747         OpBodyAlternativeNext,
748         OpBodyAlternativeEnd,
749         // Similar to the body alternatives, but used for subpatterns with two
750         // or more alternatives.
751         OpNestedAlternativeBegin,
752         OpNestedAlternativeNext,
753         OpNestedAlternativeEnd,
754         // Used for alternatives in subpatterns where there is only a single
755         // alternative (backtracking is easier in these cases), or for alternatives
756         // which never need to be backtracked (those in parenthetical assertions,
757         // terminal subpatterns).
758         OpSimpleNestedAlternativeBegin,
759         OpSimpleNestedAlternativeNext,
760         OpSimpleNestedAlternativeEnd,
761         // Used to wrap 'Once' subpattern matches (quantityMaxCount == 1).
762         OpParenthesesSubpatternOnceBegin,
763         OpParenthesesSubpatternOnceEnd,
764         // Used to wrap 'Terminal' subpattern matches (at the end of the regexp).
765         OpParenthesesSubpatternTerminalBegin,
766         OpParenthesesSubpatternTerminalEnd,
767         // Used to wrap generic captured matches
768         OpParenthesesSubpatternBegin,
769         OpParenthesesSubpatternEnd,
770         // Used to wrap parenthetical assertions.
771         OpParentheticalAssertionBegin,
772         OpParentheticalAssertionEnd,
773         // Wraps all simple terms (pattern characters, character classes).
774         OpTerm,
775         // Where an expression contains only 'once through' body alternatives
776         // and no repeating ones, this op is used to return match failure.
777         OpMatchFailed
778     };
779
780     // This structure is used to hold the compiled opcode information,
781     // including reference back to the original PatternTerm/PatternAlternatives,
782     // and JIT compilation data structures.
783     struct YarrOp {
784         explicit YarrOp(PatternTerm* term)
785             : m_op(OpTerm)
786             , m_term(term)
787             , m_isDeadCode(false)
788         {
789         }
790
791         explicit YarrOp(YarrOpCode op)
792             : m_op(op)
793             , m_isDeadCode(false)
794         {
795         }
796
797         // The operation, as a YarrOpCode, and also a reference to the PatternTerm.
798         YarrOpCode m_op;
799         PatternTerm* m_term;
800
801         // For alternatives, this holds the PatternAlternative and doubly linked
802         // references to this alternative's siblings. In the case of the
803         // OpBodyAlternativeEnd node at the end of a section of repeating nodes,
804         // m_nextOp will reference the OpBodyAlternativeBegin node of the first
805         // repeating alternative.
806         PatternAlternative* m_alternative;
807         size_t m_previousOp;
808         size_t m_nextOp;
809
810         // Used to record a set of Jumps out of the generated code, typically
811         // used for jumps out to backtracking code, and a single reentry back
812         // into the code for a node (likely where a backtrack will trigger
813         // rematching).
814         Label m_reentry;
815         JumpList m_jumps;
816
817         // Used for backtracking when the prior alternative did not consume any
818         // characters but matched.
819         Jump m_zeroLengthMatch;
820
821         // This flag is used to null out the second pattern character, when
822         // two are fused to match a pair together.
823         bool m_isDeadCode;
824
825         // Currently used in the case of some of the more complex management of
826         // 'm_checkedOffset', to cache the offset used in this alternative, to avoid
827         // recalculating it.
828         Checked<unsigned> m_checkAdjust;
829
830         // Used by OpNestedAlternativeNext/End to hold the pointer to the
831         // value that will be pushed into the pattern's frame to return to,
832         // upon backtracking back into the disjunction.
833         DataLabelPtr m_returnAddress;
834     };
835
836     // BacktrackingState
837     // This class encapsulates information about the state of code generation
838     // whilst generating the code for backtracking, when a term fails to match.
839     // Upon entry to code generation of the backtracking code for a given node,
840     // the Backtracking state will hold references to all control flow sources
841     // that are outputs in need of further backtracking from the prior node
842     // generated (which is the subsequent operation in the regular expression,
843     // and in the m_ops Vector, since we generated backtracking backwards).
844     // These references to control flow take the form of:
845     //  - A jump list of jumps, to be linked to code that will backtrack them
846     //    further.
847     //  - A set of DataLabelPtr values, to be populated with values to be
848     //    treated effectively as return addresses backtracking into complex
849     //    subpatterns.
850     //  - A flag indicating that the current sequence of generated code up to
851     //    this point requires backtracking.
852     class BacktrackingState {
853     public:
854         BacktrackingState()
855             : m_pendingFallthrough(false)
856         {
857         }
858
859         // Add a jump or jumps, a return address, or set the flag indicating
860         // that the current 'fallthrough' control flow requires backtracking.
861         void append(const Jump& jump)
862         {
863             m_laterFailures.append(jump);
864         }
865         void append(JumpList& jumpList)
866         {
867             m_laterFailures.append(jumpList);
868         }
869         void append(const DataLabelPtr& returnAddress)
870         {
871             m_pendingReturns.append(returnAddress);
872         }
873         void fallthrough()
874         {
875             ASSERT(!m_pendingFallthrough);
876             m_pendingFallthrough = true;
877         }
878
879         // These methods clear the backtracking state, either linking to the
880         // current location, a provided label, or copying the backtracking out
881         // to a JumpList. All actions may require code generation to take place,
882         // and as such are passed a pointer to the assembler.
883         void link(MacroAssembler* assembler)
884         {
885             if (m_pendingReturns.size()) {
886                 Label here(assembler);
887                 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
888                     m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
889                 m_pendingReturns.clear();
890             }
891             m_laterFailures.link(assembler);
892             m_laterFailures.clear();
893             m_pendingFallthrough = false;
894         }
895         void linkTo(Label label, MacroAssembler* assembler)
896         {
897             if (m_pendingReturns.size()) {
898                 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
899                     m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], label));
900                 m_pendingReturns.clear();
901             }
902             if (m_pendingFallthrough)
903                 assembler->jump(label);
904             m_laterFailures.linkTo(label, assembler);
905             m_laterFailures.clear();
906             m_pendingFallthrough = false;
907         }
908         void takeBacktracksToJumpList(JumpList& jumpList, MacroAssembler* assembler)
909         {
910             if (m_pendingReturns.size()) {
911                 Label here(assembler);
912                 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
913                     m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
914                 m_pendingReturns.clear();
915                 m_pendingFallthrough = true;
916             }
917             if (m_pendingFallthrough)
918                 jumpList.append(assembler->jump());
919             jumpList.append(m_laterFailures);
920             m_laterFailures.clear();
921             m_pendingFallthrough = false;
922         }
923
924         bool isEmpty()
925         {
926             return m_laterFailures.empty() && m_pendingReturns.isEmpty() && !m_pendingFallthrough;
927         }
928
929         // Called at the end of code generation to link all return addresses.
930         void linkDataLabels(LinkBuffer& linkBuffer, YarrCodeBlock& codeBlock)
931         {
932             ASSERT(isEmpty());
933             for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
934                 linkBuffer.patch(m_backtrackRecords[i].m_dataLabel, linkBuffer.locationOf(m_backtrackRecords[i].m_backtrackLocation, ptrTag(YarrBacktrackPtrTag, &codeBlock)));
935         }
936
937     private:
938         struct ReturnAddressRecord {
939             ReturnAddressRecord(DataLabelPtr dataLabel, Label backtrackLocation)
940                 : m_dataLabel(dataLabel)
941                 , m_backtrackLocation(backtrackLocation)
942             {
943             }
944
945             DataLabelPtr m_dataLabel;
946             Label m_backtrackLocation;
947         };
948
949         JumpList m_laterFailures;
950         bool m_pendingFallthrough;
951         Vector<DataLabelPtr, 4> m_pendingReturns;
952         Vector<ReturnAddressRecord, 4> m_backtrackRecords;
953     };
954
955     // Generation methods:
956     // ===================
957
958     // This method provides a default implementation of backtracking common
959     // to many terms; terms commonly jump out of the forwards  matching path
960     // on any failed conditions, and add these jumps to the m_jumps list. If
961     // no special handling is required we can often just backtrack to m_jumps.
962     void backtrackTermDefault(size_t opIndex)
963     {
964         YarrOp& op = m_ops[opIndex];
965         m_backtrackingState.append(op.m_jumps);
966     }
967
968     void generateAssertionBOL(size_t opIndex)
969     {
970         YarrOp& op = m_ops[opIndex];
971         PatternTerm* term = op.m_term;
972
973         if (m_pattern.multiline()) {
974             const RegisterID character = regT0;
975
976             JumpList matchDest;
977             if (!term->inputPosition)
978                 matchDest.append(branch32(Equal, index, Imm32(m_checkedOffset.unsafeGet())));
979
980             readCharacter(m_checkedOffset - term->inputPosition + 1, character);
981             matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
982             op.m_jumps.append(jump());
983
984             matchDest.link(this);
985         } else {
986             // Erk, really should poison out these alternatives early. :-/
987             if (term->inputPosition)
988                 op.m_jumps.append(jump());
989             else
990                 op.m_jumps.append(branch32(NotEqual, index, Imm32(m_checkedOffset.unsafeGet())));
991         }
992     }
993     void backtrackAssertionBOL(size_t opIndex)
994     {
995         backtrackTermDefault(opIndex);
996     }
997
998     void generateAssertionEOL(size_t opIndex)
999     {
1000         YarrOp& op = m_ops[opIndex];
1001         PatternTerm* term = op.m_term;
1002
1003         if (m_pattern.multiline()) {
1004             const RegisterID character = regT0;
1005
1006             JumpList matchDest;
1007             if (term->inputPosition == m_checkedOffset.unsafeGet())
1008                 matchDest.append(atEndOfInput());
1009
1010             readCharacter(m_checkedOffset - term->inputPosition, character);
1011             matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
1012             op.m_jumps.append(jump());
1013
1014             matchDest.link(this);
1015         } else {
1016             if (term->inputPosition == m_checkedOffset.unsafeGet())
1017                 op.m_jumps.append(notAtEndOfInput());
1018             // Erk, really should poison out these alternatives early. :-/
1019             else
1020                 op.m_jumps.append(jump());
1021         }
1022     }
1023     void backtrackAssertionEOL(size_t opIndex)
1024     {
1025         backtrackTermDefault(opIndex);
1026     }
1027
1028     // Also falls though on nextIsNotWordChar.
1029     void matchAssertionWordchar(size_t opIndex, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
1030     {
1031         YarrOp& op = m_ops[opIndex];
1032         PatternTerm* term = op.m_term;
1033
1034         const RegisterID character = regT0;
1035
1036         if (term->inputPosition == m_checkedOffset.unsafeGet())
1037             nextIsNotWordChar.append(atEndOfInput());
1038
1039         readCharacter(m_checkedOffset - term->inputPosition, character);
1040
1041         CharacterClass* wordcharCharacterClass;
1042
1043         if (m_unicodeIgnoreCase)
1044             wordcharCharacterClass = m_pattern.wordUnicodeIgnoreCaseCharCharacterClass();
1045         else
1046             wordcharCharacterClass = m_pattern.wordcharCharacterClass();
1047
1048         matchCharacterClass(character, nextIsWordChar, wordcharCharacterClass);
1049     }
1050
1051     void generateAssertionWordBoundary(size_t opIndex)
1052     {
1053         YarrOp& op = m_ops[opIndex];
1054         PatternTerm* term = op.m_term;
1055
1056         const RegisterID character = regT0;
1057
1058         Jump atBegin;
1059         JumpList matchDest;
1060         if (!term->inputPosition)
1061             atBegin = branch32(Equal, index, Imm32(m_checkedOffset.unsafeGet()));
1062         readCharacter(m_checkedOffset - term->inputPosition + 1, character);
1063
1064         CharacterClass* wordcharCharacterClass;
1065
1066         if (m_unicodeIgnoreCase)
1067             wordcharCharacterClass = m_pattern.wordUnicodeIgnoreCaseCharCharacterClass();
1068         else
1069             wordcharCharacterClass = m_pattern.wordcharCharacterClass();
1070
1071         matchCharacterClass(character, matchDest, wordcharCharacterClass);
1072         if (!term->inputPosition)
1073             atBegin.link(this);
1074
1075         // We fall through to here if the last character was not a wordchar.
1076         JumpList nonWordCharThenWordChar;
1077         JumpList nonWordCharThenNonWordChar;
1078         if (term->invert()) {
1079             matchAssertionWordchar(opIndex, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
1080             nonWordCharThenWordChar.append(jump());
1081         } else {
1082             matchAssertionWordchar(opIndex, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
1083             nonWordCharThenNonWordChar.append(jump());
1084         }
1085         op.m_jumps.append(nonWordCharThenNonWordChar);
1086
1087         // We jump here if the last character was a wordchar.
1088         matchDest.link(this);
1089         JumpList wordCharThenWordChar;
1090         JumpList wordCharThenNonWordChar;
1091         if (term->invert()) {
1092             matchAssertionWordchar(opIndex, wordCharThenNonWordChar, wordCharThenWordChar);
1093             wordCharThenWordChar.append(jump());
1094         } else {
1095             matchAssertionWordchar(opIndex, wordCharThenWordChar, wordCharThenNonWordChar);
1096             // This can fall-though!
1097         }
1098
1099         op.m_jumps.append(wordCharThenWordChar);
1100
1101         nonWordCharThenWordChar.link(this);
1102         wordCharThenNonWordChar.link(this);
1103     }
1104     void backtrackAssertionWordBoundary(size_t opIndex)
1105     {
1106         backtrackTermDefault(opIndex);
1107     }
1108
1109     void generatePatternCharacterOnce(size_t opIndex)
1110     {
1111         YarrOp& op = m_ops[opIndex];
1112
1113         if (op.m_isDeadCode)
1114             return;
1115         
1116         // m_ops always ends with a OpBodyAlternativeEnd or OpMatchFailed
1117         // node, so there must always be at least one more node.
1118         ASSERT(opIndex + 1 < m_ops.size());
1119         YarrOp* nextOp = &m_ops[opIndex + 1];
1120
1121         PatternTerm* term = op.m_term;
1122         UChar32 ch = term->patternCharacter;
1123
1124         if ((ch > 0xff) && (m_charSize == Char8)) {
1125             // Have a 16 bit pattern character and an 8 bit string - short circuit
1126             op.m_jumps.append(jump());
1127             return;
1128         }
1129
1130         const RegisterID character = regT0;
1131         unsigned maxCharactersAtOnce = m_charSize == Char8 ? 4 : 2;
1132         unsigned ignoreCaseMask = 0;
1133 #if CPU(BIG_ENDIAN)
1134         int allCharacters = ch << (m_charSize == Char8 ? 24 : 16);
1135 #else
1136         int allCharacters = ch;
1137 #endif
1138         unsigned numberCharacters;
1139         unsigned startTermPosition = term->inputPosition;
1140
1141         // For case-insesitive compares, non-ascii characters that have different
1142         // upper & lower case representations are converted to a character class.
1143         ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(ch) || isCanonicallyUnique(ch, m_canonicalMode));
1144
1145         if (m_pattern.ignoreCase() && isASCIIAlpha(ch))
1146 #if CPU(BIG_ENDIAN)
1147             ignoreCaseMask |= 32 << (m_charSize == Char8 ? 24 : 16);
1148 #else
1149             ignoreCaseMask |= 32;
1150 #endif
1151
1152         for (numberCharacters = 1; numberCharacters < maxCharactersAtOnce && nextOp->m_op == OpTerm; ++numberCharacters, nextOp = &m_ops[opIndex + numberCharacters]) {
1153             PatternTerm* nextTerm = nextOp->m_term;
1154             
1155             if (nextTerm->type != PatternTerm::TypePatternCharacter
1156                 || nextTerm->quantityType != QuantifierFixedCount
1157                 || nextTerm->quantityMaxCount != 1
1158                 || nextTerm->inputPosition != (startTermPosition + numberCharacters)
1159                 || (U16_LENGTH(nextTerm->patternCharacter) != 1 && m_decodeSurrogatePairs))
1160                 break;
1161
1162             nextOp->m_isDeadCode = true;
1163
1164 #if CPU(BIG_ENDIAN)
1165             int shiftAmount = (m_charSize == Char8 ? 24 : 16) - ((m_charSize == Char8 ? 8 : 16) * numberCharacters);
1166 #else
1167             int shiftAmount = (m_charSize == Char8 ? 8 : 16) * numberCharacters;
1168 #endif
1169
1170             UChar32 currentCharacter = nextTerm->patternCharacter;
1171
1172             if ((currentCharacter > 0xff) && (m_charSize == Char8)) {
1173                 // Have a 16 bit pattern character and an 8 bit string - short circuit
1174                 op.m_jumps.append(jump());
1175                 return;
1176             }
1177
1178             // For case-insesitive compares, non-ascii characters that have different
1179             // upper & lower case representations are converted to a character class.
1180             ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(currentCharacter) || isCanonicallyUnique(currentCharacter, m_canonicalMode));
1181
1182             allCharacters |= (currentCharacter << shiftAmount);
1183
1184             if ((m_pattern.ignoreCase()) && (isASCIIAlpha(currentCharacter)))
1185                 ignoreCaseMask |= 32 << shiftAmount;                    
1186         }
1187
1188         if (m_charSize == Char8) {
1189             switch (numberCharacters) {
1190             case 1:
1191                 op.m_jumps.append(jumpIfCharNotEquals(ch, m_checkedOffset - startTermPosition, character));
1192                 return;
1193             case 2: {
1194                 load16Unaligned(negativeOffsetIndexedAddress(m_checkedOffset - startTermPosition, character), character);
1195                 break;
1196             }
1197             case 3: {
1198                 load16Unaligned(negativeOffsetIndexedAddress(m_checkedOffset - startTermPosition, character), character);
1199                 if (ignoreCaseMask)
1200                     or32(Imm32(ignoreCaseMask), character);
1201                 op.m_jumps.append(branch32(NotEqual, character, Imm32((allCharacters & 0xffff) | ignoreCaseMask)));
1202                 op.m_jumps.append(jumpIfCharNotEquals(allCharacters >> 16, m_checkedOffset - startTermPosition - 2, character));
1203                 return;
1204             }
1205             case 4: {
1206                 load32WithUnalignedHalfWords(negativeOffsetIndexedAddress(m_checkedOffset- startTermPosition, character), character);
1207                 break;
1208             }
1209             }
1210         } else {
1211             switch (numberCharacters) {
1212             case 1:
1213                 op.m_jumps.append(jumpIfCharNotEquals(ch, m_checkedOffset - term->inputPosition, character));
1214                 return;
1215             case 2:
1216                 load32WithUnalignedHalfWords(negativeOffsetIndexedAddress(m_checkedOffset- term->inputPosition, character), character);
1217                 break;
1218             }
1219         }
1220
1221         if (ignoreCaseMask)
1222             or32(Imm32(ignoreCaseMask), character);
1223         op.m_jumps.append(branch32(NotEqual, character, Imm32(allCharacters | ignoreCaseMask)));
1224         return;
1225     }
1226     void backtrackPatternCharacterOnce(size_t opIndex)
1227     {
1228         backtrackTermDefault(opIndex);
1229     }
1230
1231     void generatePatternCharacterFixed(size_t opIndex)
1232     {
1233         YarrOp& op = m_ops[opIndex];
1234         PatternTerm* term = op.m_term;
1235         UChar32 ch = term->patternCharacter;
1236
1237         const RegisterID character = regT0;
1238         const RegisterID countRegister = regT1;
1239
1240         move(index, countRegister);
1241         Checked<unsigned> scaledMaxCount = term->quantityMaxCount;
1242         scaledMaxCount *= U_IS_BMP(ch) ? 1 : 2;
1243         sub32(Imm32(scaledMaxCount.unsafeGet()), countRegister);
1244
1245         Label loop(this);
1246         readCharacter(m_checkedOffset - term->inputPosition - scaledMaxCount, character, countRegister);
1247         // For case-insesitive compares, non-ascii characters that have different
1248         // upper & lower case representations are converted to a character class.
1249         ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(ch) || isCanonicallyUnique(ch, m_canonicalMode));
1250         if (m_pattern.ignoreCase() && isASCIIAlpha(ch)) {
1251             or32(TrustedImm32(0x20), character);
1252             ch |= 0x20;
1253         }
1254
1255         op.m_jumps.append(branch32(NotEqual, character, Imm32(ch)));
1256 #ifdef JIT_UNICODE_EXPRESSIONS
1257         if (m_decodeSurrogatePairs && !U_IS_BMP(ch))
1258             add32(TrustedImm32(2), countRegister);
1259         else
1260 #endif
1261             add32(TrustedImm32(1), countRegister);
1262         branch32(NotEqual, countRegister, index).linkTo(loop, this);
1263     }
1264     void backtrackPatternCharacterFixed(size_t opIndex)
1265     {
1266         backtrackTermDefault(opIndex);
1267     }
1268
1269     void generatePatternCharacterGreedy(size_t opIndex)
1270     {
1271         YarrOp& op = m_ops[opIndex];
1272         PatternTerm* term = op.m_term;
1273         UChar32 ch = term->patternCharacter;
1274
1275         const RegisterID character = regT0;
1276         const RegisterID countRegister = regT1;
1277
1278         move(TrustedImm32(0), countRegister);
1279
1280         // Unless have a 16 bit pattern character and an 8 bit string - short circuit
1281         if (!((ch > 0xff) && (m_charSize == Char8))) {
1282             JumpList failures;
1283             Label loop(this);
1284             failures.append(atEndOfInput());
1285             failures.append(jumpIfCharNotEquals(ch, m_checkedOffset - term->inputPosition, character));
1286
1287             add32(TrustedImm32(1), index);
1288 #ifdef JIT_UNICODE_EXPRESSIONS
1289             if (m_decodeSurrogatePairs && !U_IS_BMP(ch)) {
1290                 Jump surrogatePairOk = notAtEndOfInput();
1291                 sub32(TrustedImm32(1), index);
1292                 failures.append(jump());
1293                 surrogatePairOk.link(this);
1294                 add32(TrustedImm32(1), index);
1295             }
1296 #endif
1297             add32(TrustedImm32(1), countRegister);
1298
1299             if (term->quantityMaxCount == quantifyInfinite)
1300                 jump(loop);
1301             else
1302                 branch32(NotEqual, countRegister, Imm32(term->quantityMaxCount.unsafeGet())).linkTo(loop, this);
1303
1304             failures.link(this);
1305         }
1306         op.m_reentry = label();
1307
1308         storeToFrame(countRegister, term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex());
1309     }
1310     void backtrackPatternCharacterGreedy(size_t opIndex)
1311     {
1312         YarrOp& op = m_ops[opIndex];
1313         PatternTerm* term = op.m_term;
1314
1315         const RegisterID countRegister = regT1;
1316
1317         m_backtrackingState.link(this);
1318
1319         loadFromFrame(term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex(), countRegister);
1320         m_backtrackingState.append(branchTest32(Zero, countRegister));
1321         sub32(TrustedImm32(1), countRegister);
1322         if (!m_decodeSurrogatePairs || U_IS_BMP(term->patternCharacter))
1323             sub32(TrustedImm32(1), index);
1324         else
1325             sub32(TrustedImm32(2), index);
1326         jump(op.m_reentry);
1327     }
1328
1329     void generatePatternCharacterNonGreedy(size_t opIndex)
1330     {
1331         YarrOp& op = m_ops[opIndex];
1332         PatternTerm* term = op.m_term;
1333
1334         const RegisterID countRegister = regT1;
1335
1336         move(TrustedImm32(0), countRegister);
1337         op.m_reentry = label();
1338         storeToFrame(countRegister, term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex());
1339     }
1340     void backtrackPatternCharacterNonGreedy(size_t opIndex)
1341     {
1342         YarrOp& op = m_ops[opIndex];
1343         PatternTerm* term = op.m_term;
1344         UChar32 ch = term->patternCharacter;
1345
1346         const RegisterID character = regT0;
1347         const RegisterID countRegister = regT1;
1348
1349         m_backtrackingState.link(this);
1350
1351         loadFromFrame(term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex(), countRegister);
1352
1353         // Unless have a 16 bit pattern character and an 8 bit string - short circuit
1354         if (!((ch > 0xff) && (m_charSize == Char8))) {
1355             JumpList nonGreedyFailures;
1356             nonGreedyFailures.append(atEndOfInput());
1357             if (term->quantityMaxCount != quantifyInfinite)
1358                 nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityMaxCount.unsafeGet())));
1359             nonGreedyFailures.append(jumpIfCharNotEquals(ch, m_checkedOffset - term->inputPosition, character));
1360
1361             add32(TrustedImm32(1), index);
1362 #ifdef JIT_UNICODE_EXPRESSIONS
1363             if (m_decodeSurrogatePairs && !U_IS_BMP(ch)) {
1364                 Jump surrogatePairOk = notAtEndOfInput();
1365                 sub32(TrustedImm32(1), index);
1366                 nonGreedyFailures.append(jump());
1367                 surrogatePairOk.link(this);
1368                 add32(TrustedImm32(1), index);
1369             }
1370 #endif
1371             add32(TrustedImm32(1), countRegister);
1372
1373             jump(op.m_reentry);
1374             nonGreedyFailures.link(this);
1375         }
1376
1377         if (m_decodeSurrogatePairs && !U_IS_BMP(ch)) {
1378             // subtract countRegister*2 for non-BMP characters
1379             lshift32(TrustedImm32(1), countRegister);
1380         }
1381
1382         sub32(countRegister, index);
1383         m_backtrackingState.fallthrough();
1384     }
1385
1386     void generateCharacterClassOnce(size_t opIndex)
1387     {
1388         YarrOp& op = m_ops[opIndex];
1389         PatternTerm* term = op.m_term;
1390
1391         const RegisterID character = regT0;
1392
1393         if (m_decodeSurrogatePairs)
1394             storeToFrame(index, term->frameLocation + BackTrackInfoCharacterClass::beginIndex());
1395
1396         JumpList matchDest;
1397         readCharacter(m_checkedOffset - term->inputPosition, character);
1398         // If we are matching the "any character" builtin class we only need to read the
1399         // character and don't need to match as it will always succeed.
1400         if (term->invert() || !term->characterClass->m_anyCharacter) {
1401             matchCharacterClass(character, matchDest, term->characterClass);
1402
1403             if (term->invert())
1404                 op.m_jumps.append(matchDest);
1405             else {
1406                 op.m_jumps.append(jump());
1407                 matchDest.link(this);
1408             }
1409         }
1410 #ifdef JIT_UNICODE_EXPRESSIONS
1411         if (m_decodeSurrogatePairs) {
1412             Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
1413             add32(TrustedImm32(1), index);
1414             isBMPChar.link(this);
1415         }
1416 #endif
1417     }
1418     void backtrackCharacterClassOnce(size_t opIndex)
1419     {
1420 #ifdef JIT_UNICODE_EXPRESSIONS
1421         if (m_decodeSurrogatePairs) {
1422             YarrOp& op = m_ops[opIndex];
1423             PatternTerm* term = op.m_term;
1424
1425             m_backtrackingState.link(this);
1426             loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::beginIndex(), index);
1427             m_backtrackingState.fallthrough();
1428         }
1429 #endif
1430         backtrackTermDefault(opIndex);
1431     }
1432
1433     void generateCharacterClassFixed(size_t opIndex)
1434     {
1435         YarrOp& op = m_ops[opIndex];
1436         PatternTerm* term = op.m_term;
1437
1438         const RegisterID character = regT0;
1439         const RegisterID countRegister = regT1;
1440
1441         move(index, countRegister);
1442         sub32(Imm32(term->quantityMaxCount.unsafeGet()), countRegister);
1443
1444         Label loop(this);
1445         JumpList matchDest;
1446         readCharacter(m_checkedOffset - term->inputPosition - term->quantityMaxCount, character, countRegister);
1447         // If we are matching the "any character" builtin class we only need to read the
1448         // character and don't need to match as it will always succeed.
1449         if (term->invert() || !term->characterClass->m_anyCharacter) {
1450             matchCharacterClass(character, matchDest, term->characterClass);
1451
1452             if (term->invert())
1453                 op.m_jumps.append(matchDest);
1454             else {
1455                 op.m_jumps.append(jump());
1456                 matchDest.link(this);
1457             }
1458         }
1459
1460         add32(TrustedImm32(1), countRegister);
1461 #ifdef JIT_UNICODE_EXPRESSIONS
1462         if (m_decodeSurrogatePairs) {
1463             Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
1464             op.m_jumps.append(atEndOfInput());
1465             add32(TrustedImm32(1), countRegister);
1466             add32(TrustedImm32(1), index);
1467             isBMPChar.link(this);
1468         }
1469 #endif
1470         branch32(NotEqual, countRegister, index).linkTo(loop, this);
1471     }
1472     void backtrackCharacterClassFixed(size_t opIndex)
1473     {
1474         backtrackTermDefault(opIndex);
1475     }
1476
1477     void generateCharacterClassGreedy(size_t opIndex)
1478     {
1479         YarrOp& op = m_ops[opIndex];
1480         PatternTerm* term = op.m_term;
1481
1482         const RegisterID character = regT0;
1483         const RegisterID countRegister = regT1;
1484
1485         if (m_decodeSurrogatePairs)
1486             storeToFrame(index, term->frameLocation + BackTrackInfoCharacterClass::beginIndex());
1487         move(TrustedImm32(0), countRegister);
1488
1489         JumpList failures;
1490         Label loop(this);
1491         failures.append(atEndOfInput());
1492
1493         if (term->invert()) {
1494             readCharacter(m_checkedOffset - term->inputPosition, character);
1495             matchCharacterClass(character, failures, term->characterClass);
1496         } else {
1497             JumpList matchDest;
1498             readCharacter(m_checkedOffset - term->inputPosition, character);
1499             // If we are matching the "any character" builtin class we only need to read the
1500             // character and don't need to match as it will always succeed.
1501             if (!term->characterClass->m_anyCharacter) {
1502                 matchCharacterClass(character, matchDest, term->characterClass);
1503                 failures.append(jump());
1504             }
1505             matchDest.link(this);
1506         }
1507
1508         add32(TrustedImm32(1), index);
1509 #ifdef JIT_UNICODE_EXPRESSIONS
1510         if (m_decodeSurrogatePairs) {
1511             failures.append(atEndOfInput());
1512             Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
1513             add32(TrustedImm32(1), index);
1514             isBMPChar.link(this);
1515         }
1516 #endif
1517         add32(TrustedImm32(1), countRegister);
1518
1519         if (term->quantityMaxCount != quantifyInfinite) {
1520             branch32(NotEqual, countRegister, Imm32(term->quantityMaxCount.unsafeGet())).linkTo(loop, this);
1521             failures.append(jump());
1522         } else
1523             jump(loop);
1524
1525         failures.link(this);
1526         op.m_reentry = label();
1527
1528         storeToFrame(countRegister, term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex());
1529     }
1530     void backtrackCharacterClassGreedy(size_t opIndex)
1531     {
1532         YarrOp& op = m_ops[opIndex];
1533         PatternTerm* term = op.m_term;
1534
1535         const RegisterID countRegister = regT1;
1536
1537         m_backtrackingState.link(this);
1538
1539         loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex(), countRegister);
1540         m_backtrackingState.append(branchTest32(Zero, countRegister));
1541         sub32(TrustedImm32(1), countRegister);
1542         if (!m_decodeSurrogatePairs)
1543             sub32(TrustedImm32(1), index);
1544         else {
1545             const RegisterID character = regT0;
1546
1547             loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::beginIndex(), index);
1548             // Rematch one less
1549             storeToFrame(countRegister, term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex());
1550
1551             Label rematchLoop(this);
1552             readCharacter(m_checkedOffset - term->inputPosition, character);
1553
1554             sub32(TrustedImm32(1), countRegister);
1555             add32(TrustedImm32(1), index);
1556
1557 #ifdef JIT_UNICODE_EXPRESSIONS
1558             Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
1559             add32(TrustedImm32(1), index);
1560             isBMPChar.link(this);
1561 #endif
1562
1563             branchTest32(Zero, countRegister).linkTo(rematchLoop, this);
1564
1565             loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex(), countRegister);
1566         }
1567         jump(op.m_reentry);
1568     }
1569
1570     void generateCharacterClassNonGreedy(size_t opIndex)
1571     {
1572         YarrOp& op = m_ops[opIndex];
1573         PatternTerm* term = op.m_term;
1574
1575         const RegisterID countRegister = regT1;
1576
1577         move(TrustedImm32(0), countRegister);
1578         op.m_reentry = label();
1579         if (m_decodeSurrogatePairs)
1580             storeToFrame(index, term->frameLocation + BackTrackInfoCharacterClass::beginIndex());
1581         storeToFrame(countRegister, term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex());
1582     }
1583
1584     void backtrackCharacterClassNonGreedy(size_t opIndex)
1585     {
1586         YarrOp& op = m_ops[opIndex];
1587         PatternTerm* term = op.m_term;
1588
1589         const RegisterID character = regT0;
1590         const RegisterID countRegister = regT1;
1591
1592         JumpList nonGreedyFailures;
1593
1594         m_backtrackingState.link(this);
1595
1596         if (m_decodeSurrogatePairs)
1597             loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::beginIndex(), index);
1598         loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex(), countRegister);
1599
1600         nonGreedyFailures.append(atEndOfInput());
1601         nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityMaxCount.unsafeGet())));
1602
1603         JumpList matchDest;
1604         readCharacter(m_checkedOffset - term->inputPosition, character);
1605         // If we are matching the "any character" builtin class we only need to read the
1606         // character and don't need to match as it will always succeed.
1607         if (term->invert() || !term->characterClass->m_anyCharacter) {
1608             matchCharacterClass(character, matchDest, term->characterClass);
1609
1610             if (term->invert())
1611                 nonGreedyFailures.append(matchDest);
1612             else {
1613                 nonGreedyFailures.append(jump());
1614                 matchDest.link(this);
1615             }
1616         }
1617
1618         add32(TrustedImm32(1), index);
1619 #ifdef JIT_UNICODE_EXPRESSIONS
1620         if (m_decodeSurrogatePairs) {
1621             nonGreedyFailures.append(atEndOfInput());
1622             Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
1623             add32(TrustedImm32(1), index);
1624             isBMPChar.link(this);
1625         }
1626 #endif
1627         add32(TrustedImm32(1), countRegister);
1628
1629         jump(op.m_reentry);
1630
1631         nonGreedyFailures.link(this);
1632         sub32(countRegister, index);
1633         m_backtrackingState.fallthrough();
1634     }
1635
1636     void generateDotStarEnclosure(size_t opIndex)
1637     {
1638         YarrOp& op = m_ops[opIndex];
1639         PatternTerm* term = op.m_term;
1640
1641         const RegisterID character = regT0;
1642         const RegisterID matchPos = regT1;
1643 #ifndef HAVE_INITIAL_START_REG
1644         const RegisterID initialStart = character;
1645 #endif
1646
1647         JumpList foundBeginningNewLine;
1648         JumpList saveStartIndex;
1649         JumpList foundEndingNewLine;
1650
1651         if (m_pattern.dotAll()) {
1652             move(TrustedImm32(0), matchPos);
1653             setMatchStart(matchPos);
1654             move(length, index);
1655             return;
1656         }
1657
1658         ASSERT(!m_pattern.m_body->m_hasFixedSize);
1659         getMatchStart(matchPos);
1660
1661 #ifndef HAVE_INITIAL_START_REG
1662         loadFromFrame(m_pattern.m_initialStartValueFrameLocation, initialStart);
1663 #endif
1664         saveStartIndex.append(branch32(BelowOrEqual, matchPos, initialStart));
1665         Label findBOLLoop(this);
1666         sub32(TrustedImm32(1), matchPos);
1667         if (m_charSize == Char8)
1668             load8(BaseIndex(input, matchPos, TimesOne, 0), character);
1669         else
1670             load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
1671         matchCharacterClass(character, foundBeginningNewLine, m_pattern.newlineCharacterClass());
1672
1673 #ifndef HAVE_INITIAL_START_REG
1674         loadFromFrame(m_pattern.m_initialStartValueFrameLocation, initialStart);
1675 #endif
1676         branch32(Above, matchPos, initialStart).linkTo(findBOLLoop, this);
1677         saveStartIndex.append(jump());
1678
1679         foundBeginningNewLine.link(this);
1680         add32(TrustedImm32(1), matchPos); // Advance past newline
1681         saveStartIndex.link(this);
1682
1683         if (!m_pattern.multiline() && term->anchors.bolAnchor)
1684             op.m_jumps.append(branchTest32(NonZero, matchPos));
1685
1686         ASSERT(!m_pattern.m_body->m_hasFixedSize);
1687         setMatchStart(matchPos);
1688
1689         move(index, matchPos);
1690
1691         Label findEOLLoop(this);        
1692         foundEndingNewLine.append(branch32(Equal, matchPos, length));
1693         if (m_charSize == Char8)
1694             load8(BaseIndex(input, matchPos, TimesOne, 0), character);
1695         else
1696             load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
1697         matchCharacterClass(character, foundEndingNewLine, m_pattern.newlineCharacterClass());
1698         add32(TrustedImm32(1), matchPos);
1699         jump(findEOLLoop);
1700
1701         foundEndingNewLine.link(this);
1702
1703         if (!m_pattern.multiline() && term->anchors.eolAnchor)
1704             op.m_jumps.append(branch32(NotEqual, matchPos, length));
1705
1706         move(matchPos, index);
1707     }
1708
1709     void backtrackDotStarEnclosure(size_t opIndex)
1710     {
1711         backtrackTermDefault(opIndex);
1712     }
1713     
1714     // Code generation/backtracking for simple terms
1715     // (pattern characters, character classes, and assertions).
1716     // These methods farm out work to the set of functions above.
1717     void generateTerm(size_t opIndex)
1718     {
1719         YarrOp& op = m_ops[opIndex];
1720         PatternTerm* term = op.m_term;
1721
1722         switch (term->type) {
1723         case PatternTerm::TypePatternCharacter:
1724             switch (term->quantityType) {
1725             case QuantifierFixedCount:
1726                 if (term->quantityMaxCount == 1)
1727                     generatePatternCharacterOnce(opIndex);
1728                 else
1729                     generatePatternCharacterFixed(opIndex);
1730                 break;
1731             case QuantifierGreedy:
1732                 generatePatternCharacterGreedy(opIndex);
1733                 break;
1734             case QuantifierNonGreedy:
1735                 generatePatternCharacterNonGreedy(opIndex);
1736                 break;
1737             }
1738             break;
1739
1740         case PatternTerm::TypeCharacterClass:
1741             switch (term->quantityType) {
1742             case QuantifierFixedCount:
1743                 if (term->quantityMaxCount == 1)
1744                     generateCharacterClassOnce(opIndex);
1745                 else
1746                     generateCharacterClassFixed(opIndex);
1747                 break;
1748             case QuantifierGreedy:
1749                 generateCharacterClassGreedy(opIndex);
1750                 break;
1751             case QuantifierNonGreedy:
1752                 generateCharacterClassNonGreedy(opIndex);
1753                 break;
1754             }
1755             break;
1756
1757         case PatternTerm::TypeAssertionBOL:
1758             generateAssertionBOL(opIndex);
1759             break;
1760
1761         case PatternTerm::TypeAssertionEOL:
1762             generateAssertionEOL(opIndex);
1763             break;
1764
1765         case PatternTerm::TypeAssertionWordBoundary:
1766             generateAssertionWordBoundary(opIndex);
1767             break;
1768
1769         case PatternTerm::TypeForwardReference:
1770             break;
1771
1772         case PatternTerm::TypeParenthesesSubpattern:
1773         case PatternTerm::TypeParentheticalAssertion:
1774             RELEASE_ASSERT_NOT_REACHED();
1775         case PatternTerm::TypeBackReference:
1776             m_failureReason = JITFailureReason::BackReference;
1777             break;
1778         case PatternTerm::TypeDotStarEnclosure:
1779             generateDotStarEnclosure(opIndex);
1780             break;
1781         }
1782     }
1783     void backtrackTerm(size_t opIndex)
1784     {
1785         YarrOp& op = m_ops[opIndex];
1786         PatternTerm* term = op.m_term;
1787
1788         switch (term->type) {
1789         case PatternTerm::TypePatternCharacter:
1790             switch (term->quantityType) {
1791             case QuantifierFixedCount:
1792                 if (term->quantityMaxCount == 1)
1793                     backtrackPatternCharacterOnce(opIndex);
1794                 else
1795                     backtrackPatternCharacterFixed(opIndex);
1796                 break;
1797             case QuantifierGreedy:
1798                 backtrackPatternCharacterGreedy(opIndex);
1799                 break;
1800             case QuantifierNonGreedy:
1801                 backtrackPatternCharacterNonGreedy(opIndex);
1802                 break;
1803             }
1804             break;
1805
1806         case PatternTerm::TypeCharacterClass:
1807             switch (term->quantityType) {
1808             case QuantifierFixedCount:
1809                 if (term->quantityMaxCount == 1)
1810                     backtrackCharacterClassOnce(opIndex);
1811                 else
1812                     backtrackCharacterClassFixed(opIndex);
1813                 break;
1814             case QuantifierGreedy:
1815                 backtrackCharacterClassGreedy(opIndex);
1816                 break;
1817             case QuantifierNonGreedy:
1818                 backtrackCharacterClassNonGreedy(opIndex);
1819                 break;
1820             }
1821             break;
1822
1823         case PatternTerm::TypeAssertionBOL:
1824             backtrackAssertionBOL(opIndex);
1825             break;
1826
1827         case PatternTerm::TypeAssertionEOL:
1828             backtrackAssertionEOL(opIndex);
1829             break;
1830
1831         case PatternTerm::TypeAssertionWordBoundary:
1832             backtrackAssertionWordBoundary(opIndex);
1833             break;
1834
1835         case PatternTerm::TypeForwardReference:
1836             break;
1837
1838         case PatternTerm::TypeParenthesesSubpattern:
1839         case PatternTerm::TypeParentheticalAssertion:
1840             RELEASE_ASSERT_NOT_REACHED();
1841
1842         case PatternTerm::TypeDotStarEnclosure:
1843             backtrackDotStarEnclosure(opIndex);
1844             break;
1845
1846         case PatternTerm::TypeBackReference:
1847             m_failureReason = JITFailureReason::BackReference;
1848             break;
1849         }
1850     }
1851
1852     void generate()
1853     {
1854         // Forwards generate the matching code.
1855         ASSERT(m_ops.size());
1856         size_t opIndex = 0;
1857
1858         do {
1859             YarrOp& op = m_ops[opIndex];
1860             switch (op.m_op) {
1861
1862             case OpTerm:
1863                 generateTerm(opIndex);
1864                 break;
1865
1866             // OpBodyAlternativeBegin/Next/End
1867             //
1868             // These nodes wrap the set of alternatives in the body of the regular expression.
1869             // There may be either one or two chains of OpBodyAlternative nodes, one representing
1870             // the 'once through' sequence of alternatives (if any exist), and one representing
1871             // the repeating alternatives (again, if any exist).
1872             //
1873             // Upon normal entry to the Begin alternative, we will check that input is available.
1874             // Reentry to the Begin alternative will take place after the check has taken place,
1875             // and will assume that the input position has already been progressed as appropriate.
1876             //
1877             // Entry to subsequent Next/End alternatives occurs when the prior alternative has
1878             // successfully completed a match - return a success state from JIT code.
1879             //
1880             // Next alternatives allow for reentry optimized to suit backtracking from its
1881             // preceding alternative. It expects the input position to still be set to a position
1882             // appropriate to its predecessor, and it will only perform an input check if the
1883             // predecessor had a minimum size less than its own.
1884             //
1885             // In the case 'once through' expressions, the End node will also have a reentry
1886             // point to jump to when the last alternative fails. Again, this expects the input
1887             // position to still reflect that expected by the prior alternative.
1888             case OpBodyAlternativeBegin: {
1889                 PatternAlternative* alternative = op.m_alternative;
1890
1891                 // Upon entry at the head of the set of alternatives, check if input is available
1892                 // to run the first alternative. (This progresses the input position).
1893                 op.m_jumps.append(jumpIfNoAvailableInput(alternative->m_minimumSize));
1894                 // We will reenter after the check, and assume the input position to have been
1895                 // set as appropriate to this alternative.
1896                 op.m_reentry = label();
1897
1898                 m_checkedOffset += alternative->m_minimumSize;
1899                 break;
1900             }
1901             case OpBodyAlternativeNext:
1902             case OpBodyAlternativeEnd: {
1903                 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
1904                 PatternAlternative* alternative = op.m_alternative;
1905
1906                 // If we get here, the prior alternative matched - return success.
1907                 
1908                 // Adjust the stack pointer to remove the pattern's frame.
1909                 removeCallFrame();
1910
1911                 // Load appropriate values into the return register and the first output
1912                 // slot, and return. In the case of pattern with a fixed size, we will
1913                 // not have yet set the value in the first 
1914                 ASSERT(index != returnRegister);
1915                 if (m_pattern.m_body->m_hasFixedSize) {
1916                     move(index, returnRegister);
1917                     if (priorAlternative->m_minimumSize)
1918                         sub32(Imm32(priorAlternative->m_minimumSize), returnRegister);
1919                     if (compileMode == IncludeSubpatterns)
1920                         store32(returnRegister, output);
1921                 } else
1922                     getMatchStart(returnRegister);
1923                 if (compileMode == IncludeSubpatterns)
1924                     store32(index, Address(output, 4));
1925                 move(index, returnRegister2);
1926
1927                 generateReturn();
1928
1929                 // This is the divide between the tail of the prior alternative, above, and
1930                 // the head of the subsequent alternative, below.
1931
1932                 if (op.m_op == OpBodyAlternativeNext) {
1933                     // This is the reentry point for the Next alternative. We expect any code
1934                     // that jumps here to do so with the input position matching that of the
1935                     // PRIOR alteranative, and we will only check input availability if we
1936                     // need to progress it forwards.
1937                     op.m_reentry = label();
1938                     if (alternative->m_minimumSize > priorAlternative->m_minimumSize) {
1939                         add32(Imm32(alternative->m_minimumSize - priorAlternative->m_minimumSize), index);
1940                         op.m_jumps.append(jumpIfNoAvailableInput());
1941                     } else if (priorAlternative->m_minimumSize > alternative->m_minimumSize)
1942                         sub32(Imm32(priorAlternative->m_minimumSize - alternative->m_minimumSize), index);
1943                 } else if (op.m_nextOp == notFound) {
1944                     // This is the reentry point for the End of 'once through' alternatives,
1945                     // jumped to when the last alternative fails to match.
1946                     op.m_reentry = label();
1947                     sub32(Imm32(priorAlternative->m_minimumSize), index);
1948                 }
1949
1950                 if (op.m_op == OpBodyAlternativeNext)
1951                     m_checkedOffset += alternative->m_minimumSize;
1952                 m_checkedOffset -= priorAlternative->m_minimumSize;
1953                 break;
1954             }
1955
1956             // OpSimpleNestedAlternativeBegin/Next/End
1957             // OpNestedAlternativeBegin/Next/End
1958             //
1959             // These nodes are used to handle sets of alternatives that are nested within
1960             // subpatterns and parenthetical assertions. The 'simple' forms are used where
1961             // we do not need to be able to backtrack back into any alternative other than
1962             // the last, the normal forms allow backtracking into any alternative.
1963             //
1964             // Each Begin/Next node is responsible for planting an input check to ensure
1965             // sufficient input is available on entry. Next nodes additionally need to
1966             // jump to the end - Next nodes use the End node's m_jumps list to hold this
1967             // set of jumps.
1968             //
1969             // In the non-simple forms, successful alternative matches must store a
1970             // 'return address' using a DataLabelPtr, used to store the address to jump
1971             // to when backtracking, to get to the code for the appropriate alternative.
1972             case OpSimpleNestedAlternativeBegin:
1973             case OpNestedAlternativeBegin: {
1974                 PatternTerm* term = op.m_term;
1975                 PatternAlternative* alternative = op.m_alternative;
1976                 PatternDisjunction* disjunction = term->parentheses.disjunction;
1977
1978                 // Calculate how much input we need to check for, and if non-zero check.
1979                 op.m_checkAdjust = Checked<unsigned>(alternative->m_minimumSize);
1980                 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
1981                     op.m_checkAdjust -= disjunction->m_minimumSize;
1982                 if (op.m_checkAdjust)
1983                     op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust.unsafeGet()));
1984
1985                 m_checkedOffset += op.m_checkAdjust;
1986                 break;
1987             }
1988             case OpSimpleNestedAlternativeNext:
1989             case OpNestedAlternativeNext: {
1990                 PatternTerm* term = op.m_term;
1991                 PatternAlternative* alternative = op.m_alternative;
1992                 PatternDisjunction* disjunction = term->parentheses.disjunction;
1993
1994                 // In the non-simple case, store a 'return address' so we can backtrack correctly.
1995                 if (op.m_op == OpNestedAlternativeNext) {
1996                     unsigned parenthesesFrameLocation = term->frameLocation;
1997                     op.m_returnAddress = storeToFrameWithPatch(parenthesesFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
1998                 }
1999
2000                 if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
2001                     // If the previous alternative matched without consuming characters then
2002                     // backtrack to try to match while consumming some input.
2003                     op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
2004                 }
2005
2006                 // If we reach here then the last alternative has matched - jump to the
2007                 // End node, to skip over any further alternatives.
2008                 //
2009                 // FIXME: this is logically O(N^2) (though N can be expected to be very
2010                 // small). We could avoid this either by adding an extra jump to the JIT
2011                 // data structures, or by making backtracking code that jumps to Next
2012                 // alternatives are responsible for checking that input is available (if
2013                 // we didn't need to plant the input checks, then m_jumps would be free).
2014                 YarrOp* endOp = &m_ops[op.m_nextOp];
2015                 while (endOp->m_nextOp != notFound) {
2016                     ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
2017                     endOp = &m_ops[endOp->m_nextOp];
2018                 }
2019                 ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
2020                 endOp->m_jumps.append(jump());
2021
2022                 // This is the entry point for the next alternative.
2023                 op.m_reentry = label();
2024
2025                 // Calculate how much input we need to check for, and if non-zero check.
2026                 op.m_checkAdjust = alternative->m_minimumSize;
2027                 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
2028                     op.m_checkAdjust -= disjunction->m_minimumSize;
2029                 if (op.m_checkAdjust)
2030                     op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust.unsafeGet()));
2031
2032                 YarrOp& lastOp = m_ops[op.m_previousOp];
2033                 m_checkedOffset -= lastOp.m_checkAdjust;
2034                 m_checkedOffset += op.m_checkAdjust;
2035                 break;
2036             }
2037             case OpSimpleNestedAlternativeEnd:
2038             case OpNestedAlternativeEnd: {
2039                 PatternTerm* term = op.m_term;
2040
2041                 // In the non-simple case, store a 'return address' so we can backtrack correctly.
2042                 if (op.m_op == OpNestedAlternativeEnd) {
2043                     unsigned parenthesesFrameLocation = term->frameLocation;
2044                     op.m_returnAddress = storeToFrameWithPatch(parenthesesFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
2045                 }
2046
2047                 if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
2048                     // If the previous alternative matched without consuming characters then
2049                     // backtrack to try to match while consumming some input.
2050                     op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
2051                 }
2052
2053                 // If this set of alternatives contains more than one alternative,
2054                 // then the Next nodes will have planted jumps to the End, and added
2055                 // them to this node's m_jumps list.
2056                 op.m_jumps.link(this);
2057                 op.m_jumps.clear();
2058
2059                 YarrOp& lastOp = m_ops[op.m_previousOp];
2060                 m_checkedOffset -= lastOp.m_checkAdjust;
2061                 break;
2062             }
2063
2064             // OpParenthesesSubpatternOnceBegin/End
2065             //
2066             // These nodes support (optionally) capturing subpatterns, that have a
2067             // quantity count of 1 (this covers fixed once, and ?/?? quantifiers). 
2068             case OpParenthesesSubpatternOnceBegin: {
2069                 PatternTerm* term = op.m_term;
2070                 unsigned parenthesesFrameLocation = term->frameLocation;
2071                 const RegisterID indexTemporary = regT0;
2072                 ASSERT(term->quantityMaxCount == 1);
2073
2074                 // Upon entry to a Greedy quantified set of parenthese store the index.
2075                 // We'll use this for two purposes:
2076                 //  - To indicate which iteration we are on of mathing the remainder of
2077                 //    the expression after the parentheses - the first, including the
2078                 //    match within the parentheses, or the second having skipped over them.
2079                 //  - To check for empty matches, which must be rejected.
2080                 //
2081                 // At the head of a NonGreedy set of parentheses we'll immediately set the
2082                 // value on the stack to -1 (indicating a match skipping the subpattern),
2083                 // and plant a jump to the end. We'll also plant a label to backtrack to
2084                 // to reenter the subpattern later, with a store to set up index on the
2085                 // second iteration.
2086                 //
2087                 // FIXME: for capturing parens, could use the index in the capture array?
2088                 if (term->quantityType == QuantifierGreedy)
2089                     storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex());
2090                 else if (term->quantityType == QuantifierNonGreedy) {
2091                     storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex());
2092                     op.m_jumps.append(jump());
2093                     op.m_reentry = label();
2094                     storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex());
2095                 }
2096
2097                 // If the parenthese are capturing, store the starting index value to the
2098                 // captures array, offsetting as necessary.
2099                 //
2100                 // FIXME: could avoid offsetting this value in JIT code, apply
2101                 // offsets only afterwards, at the point the results array is
2102                 // being accessed.
2103                 if (term->capture() && compileMode == IncludeSubpatterns) {
2104                     unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2105                     if (term->quantityType == QuantifierFixedCount)
2106                         inputOffset += term->parentheses.disjunction->m_minimumSize;
2107                     if (inputOffset) {
2108                         move(index, indexTemporary);
2109                         sub32(Imm32(inputOffset), indexTemporary);
2110                         setSubpatternStart(indexTemporary, term->parentheses.subpatternId);
2111                     } else
2112                         setSubpatternStart(index, term->parentheses.subpatternId);
2113                 }
2114                 break;
2115             }
2116             case OpParenthesesSubpatternOnceEnd: {
2117                 PatternTerm* term = op.m_term;
2118                 const RegisterID indexTemporary = regT0;
2119                 ASSERT(term->quantityMaxCount == 1);
2120
2121                 // Runtime ASSERT to make sure that the nested alternative handled the
2122                 // "no input consumed" check.
2123                 if (!ASSERT_DISABLED && term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) {
2124                     Jump pastBreakpoint;
2125                     pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
2126                     abortWithReason(YARRNoInputConsumed);
2127                     pastBreakpoint.link(this);
2128                 }
2129
2130                 // If the parenthese are capturing, store the ending index value to the
2131                 // captures array, offsetting as necessary.
2132                 //
2133                 // FIXME: could avoid offsetting this value in JIT code, apply
2134                 // offsets only afterwards, at the point the results array is
2135                 // being accessed.
2136                 if (term->capture() && compileMode == IncludeSubpatterns) {
2137                     unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2138                     if (inputOffset) {
2139                         move(index, indexTemporary);
2140                         sub32(Imm32(inputOffset), indexTemporary);
2141                         setSubpatternEnd(indexTemporary, term->parentheses.subpatternId);
2142                     } else
2143                         setSubpatternEnd(index, term->parentheses.subpatternId);
2144                 }
2145
2146                 // If the parentheses are quantified Greedy then add a label to jump back
2147                 // to if get a failed match from after the parentheses. For NonGreedy
2148                 // parentheses, link the jump from before the subpattern to here.
2149                 if (term->quantityType == QuantifierGreedy)
2150                     op.m_reentry = label();
2151                 else if (term->quantityType == QuantifierNonGreedy) {
2152                     YarrOp& beginOp = m_ops[op.m_previousOp];
2153                     beginOp.m_jumps.link(this);
2154                 }
2155                 break;
2156             }
2157
2158             // OpParenthesesSubpatternTerminalBegin/End
2159             case OpParenthesesSubpatternTerminalBegin: {
2160                 PatternTerm* term = op.m_term;
2161                 ASSERT(term->quantityType == QuantifierGreedy);
2162                 ASSERT(term->quantityMaxCount == quantifyInfinite);
2163                 ASSERT(!term->capture());
2164
2165                 // Upon entry set a label to loop back to.
2166                 op.m_reentry = label();
2167
2168                 // Store the start index of the current match; we need to reject zero
2169                 // length matches.
2170                 storeToFrame(index, term->frameLocation + BackTrackInfoParenthesesTerminal::beginIndex());
2171                 break;
2172             }
2173             case OpParenthesesSubpatternTerminalEnd: {
2174                 YarrOp& beginOp = m_ops[op.m_previousOp];
2175                 if (!ASSERT_DISABLED) {
2176                     PatternTerm* term = op.m_term;
2177                     
2178                     // Runtime ASSERT to make sure that the nested alternative handled the
2179                     // "no input consumed" check.
2180                     Jump pastBreakpoint;
2181                     pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
2182                     abortWithReason(YARRNoInputConsumed);
2183                     pastBreakpoint.link(this);
2184                 }
2185
2186                 // We know that the match is non-zero, we can accept it and
2187                 // loop back up to the head of the subpattern.
2188                 jump(beginOp.m_reentry);
2189
2190                 // This is the entry point to jump to when we stop matching - we will
2191                 // do so once the subpattern cannot match any more.
2192                 op.m_reentry = label();
2193                 break;
2194             }
2195
2196             // OpParenthesesSubpatternBegin/End
2197             //
2198             // These nodes support generic subpatterns.
2199             case OpParenthesesSubpatternBegin: {
2200 #if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
2201                 PatternTerm* term = op.m_term;
2202                 unsigned parenthesesFrameLocation = term->frameLocation;
2203
2204                 // Upon entry to a Greedy quantified set of parenthese store the index.
2205                 // We'll use this for two purposes:
2206                 //  - To indicate which iteration we are on of mathing the remainder of
2207                 //    the expression after the parentheses - the first, including the
2208                 //    match within the parentheses, or the second having skipped over them.
2209                 //  - To check for empty matches, which must be rejected.
2210                 //
2211                 // At the head of a NonGreedy set of parentheses we'll immediately set the
2212                 // value on the stack to -1 (indicating a match skipping the subpattern),
2213                 // and plant a jump to the end. We'll also plant a label to backtrack to
2214                 // to reenter the subpattern later, with a store to set up index on the
2215                 // second iteration.
2216                 //
2217                 // FIXME: for capturing parens, could use the index in the capture array?
2218                 if (term->quantityType == QuantifierGreedy || term->quantityType == QuantifierNonGreedy) {
2219                     storeToFrame(TrustedImm32(0), parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
2220                     storeToFrame(TrustedImmPtr(nullptr), parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
2221
2222                     if (term->quantityType == QuantifierNonGreedy) {
2223                         storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
2224                         op.m_jumps.append(jump());
2225                     }
2226                     
2227                     op.m_reentry = label();
2228                     RegisterID currParenContextReg = regT0;
2229                     RegisterID newParenContextReg = regT1;
2230
2231                     loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex(), currParenContextReg);
2232                     allocateParenContext(newParenContextReg);
2233                     storePtr(currParenContextReg, newParenContextReg);
2234                     storeToFrame(newParenContextReg, parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
2235                     saveParenContext(newParenContextReg, regT2, term->parentheses.subpatternId, term->parentheses.lastSubpatternId, parenthesesFrameLocation);
2236                     storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
2237                 }
2238
2239                 // If the parenthese are capturing, store the starting index value to the
2240                 // captures array, offsetting as necessary.
2241                 //
2242                 // FIXME: could avoid offsetting this value in JIT code, apply
2243                 // offsets only afterwards, at the point the results array is
2244                 // being accessed.
2245                 if (term->capture() && compileMode == IncludeSubpatterns) {
2246                     const RegisterID indexTemporary = regT0;
2247                     unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2248                     if (term->quantityType == QuantifierFixedCount)
2249                         inputOffset += term->parentheses.disjunction->m_minimumSize;
2250                     if (inputOffset) {
2251                         move(index, indexTemporary);
2252                         sub32(Imm32(inputOffset), indexTemporary);
2253                         setSubpatternStart(indexTemporary, term->parentheses.subpatternId);
2254                     } else
2255                         setSubpatternStart(index, term->parentheses.subpatternId);
2256                 }
2257 #else // !YARR_JIT_ALL_PARENS_EXPRESSIONS
2258                 RELEASE_ASSERT_NOT_REACHED();
2259 #endif
2260                 break;
2261             }
2262             case OpParenthesesSubpatternEnd: {
2263 #if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
2264                 PatternTerm* term = op.m_term;
2265                 unsigned parenthesesFrameLocation = term->frameLocation;
2266
2267                 // Runtime ASSERT to make sure that the nested alternative handled the
2268                 // "no input consumed" check.
2269                 if (!ASSERT_DISABLED && term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) {
2270                     Jump pastBreakpoint;
2271                     pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*)));
2272                     abortWithReason(YARRNoInputConsumed);
2273                     pastBreakpoint.link(this);
2274                 }
2275
2276                 const RegisterID countTemporary = regT1;
2277
2278                 YarrOp& beginOp = m_ops[op.m_previousOp];
2279                 loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), countTemporary);
2280                 add32(TrustedImm32(1), countTemporary);
2281                 storeToFrame(countTemporary, parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
2282
2283                 // If the parenthese are capturing, store the ending index value to the
2284                 // captures array, offsetting as necessary.
2285                 //
2286                 // FIXME: could avoid offsetting this value in JIT code, apply
2287                 // offsets only afterwards, at the point the results array is
2288                 // being accessed.
2289                 if (term->capture() && compileMode == IncludeSubpatterns) {
2290                     const RegisterID indexTemporary = regT0;
2291                     
2292                     unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2293                     if (inputOffset) {
2294                         move(index, indexTemporary);
2295                         sub32(Imm32(inputOffset), indexTemporary);
2296                         setSubpatternEnd(indexTemporary, term->parentheses.subpatternId);
2297                     } else
2298                         setSubpatternEnd(index, term->parentheses.subpatternId);
2299                 }
2300
2301                 // If the parentheses are quantified Greedy then add a label to jump back
2302                 // to if get a failed match from after the parentheses. For NonGreedy
2303                 // parentheses, link the jump from before the subpattern to here.
2304                 if (term->quantityType == QuantifierGreedy) {
2305                     if (term->quantityMaxCount != quantifyInfinite)
2306                         branch32(Below, countTemporary, Imm32(term->quantityMaxCount.unsafeGet())).linkTo(beginOp.m_reentry, this);
2307                     else
2308                         jump(beginOp.m_reentry);
2309                     
2310                     op.m_reentry = label();
2311                 } else if (term->quantityType == QuantifierNonGreedy) {
2312                     YarrOp& beginOp = m_ops[op.m_previousOp];
2313                     beginOp.m_jumps.link(this);
2314                 }
2315 #else // !YARR_JIT_ALL_PARENS_EXPRESSIONS
2316                 RELEASE_ASSERT_NOT_REACHED();
2317 #endif
2318                 break;
2319             }
2320
2321             // OpParentheticalAssertionBegin/End
2322             case OpParentheticalAssertionBegin: {
2323                 PatternTerm* term = op.m_term;
2324
2325                 // Store the current index - assertions should not update index, so
2326                 // we will need to restore it upon a successful match.
2327                 unsigned parenthesesFrameLocation = term->frameLocation;
2328                 storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParentheticalAssertion::beginIndex());
2329
2330                 // Check 
2331                 op.m_checkAdjust = m_checkedOffset - term->inputPosition;
2332                 if (op.m_checkAdjust)
2333                     sub32(Imm32(op.m_checkAdjust.unsafeGet()), index);
2334
2335                 m_checkedOffset -= op.m_checkAdjust;
2336                 break;
2337             }
2338             case OpParentheticalAssertionEnd: {
2339                 PatternTerm* term = op.m_term;
2340
2341                 // Restore the input index value.
2342                 unsigned parenthesesFrameLocation = term->frameLocation;
2343                 loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheticalAssertion::beginIndex(), index);
2344
2345                 // If inverted, a successful match of the assertion must be treated
2346                 // as a failure, so jump to backtracking.
2347                 if (term->invert()) {
2348                     op.m_jumps.append(jump());
2349                     op.m_reentry = label();
2350                 }
2351
2352                 YarrOp& lastOp = m_ops[op.m_previousOp];
2353                 m_checkedOffset += lastOp.m_checkAdjust;
2354                 break;
2355             }
2356
2357             case OpMatchFailed:
2358                 removeCallFrame();
2359                 generateFailReturn();
2360                 break;
2361             }
2362
2363             ++opIndex;
2364         } while (opIndex < m_ops.size());
2365     }
2366
2367     void backtrack()
2368     {
2369         // Backwards generate the backtracking code.
2370         size_t opIndex = m_ops.size();
2371         ASSERT(opIndex);
2372
2373         do {
2374             --opIndex;
2375             YarrOp& op = m_ops[opIndex];
2376             switch (op.m_op) {
2377
2378             case OpTerm:
2379                 backtrackTerm(opIndex);
2380                 break;
2381
2382             // OpBodyAlternativeBegin/Next/End
2383             //
2384             // For each Begin/Next node representing an alternative, we need to decide what to do
2385             // in two circumstances:
2386             //  - If we backtrack back into this node, from within the alternative.
2387             //  - If the input check at the head of the alternative fails (if this exists).
2388             //
2389             // We treat these two cases differently since in the former case we have slightly
2390             // more information - since we are backtracking out of a prior alternative we know
2391             // that at least enough input was available to run it. For example, given the regular
2392             // expression /a|b/, if we backtrack out of the first alternative (a failed pattern
2393             // character match of 'a'), then we need not perform an additional input availability
2394             // check before running the second alternative.
2395             //
2396             // Backtracking required differs for the last alternative, which in the case of the
2397             // repeating set of alternatives must loop. The code generated for the last alternative
2398             // will also be used to handle all input check failures from any prior alternatives -
2399             // these require similar functionality, in seeking the next available alternative for
2400             // which there is sufficient input.
2401             //
2402             // Since backtracking of all other alternatives simply requires us to link backtracks
2403             // to the reentry point for the subsequent alternative, we will only be generating any
2404             // code when backtracking the last alternative.
2405             case OpBodyAlternativeBegin:
2406             case OpBodyAlternativeNext: {
2407                 PatternAlternative* alternative = op.m_alternative;
2408
2409                 if (op.m_op == OpBodyAlternativeNext) {
2410                     PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
2411                     m_checkedOffset += priorAlternative->m_minimumSize;
2412                 }
2413                 m_checkedOffset -= alternative->m_minimumSize;
2414
2415                 // Is this the last alternative? If not, then if we backtrack to this point we just
2416                 // need to jump to try to match the next alternative.
2417                 if (m_ops[op.m_nextOp].m_op != OpBodyAlternativeEnd) {
2418                     m_backtrackingState.linkTo(m_ops[op.m_nextOp].m_reentry, this);
2419                     break;
2420                 }
2421                 YarrOp& endOp = m_ops[op.m_nextOp];
2422
2423                 YarrOp* beginOp = &op;
2424                 while (beginOp->m_op != OpBodyAlternativeBegin) {
2425                     ASSERT(beginOp->m_op == OpBodyAlternativeNext);
2426                     beginOp = &m_ops[beginOp->m_previousOp];
2427                 }
2428
2429                 bool onceThrough = endOp.m_nextOp == notFound;
2430                 
2431                 JumpList lastStickyAlternativeFailures;
2432
2433                 // First, generate code to handle cases where we backtrack out of an attempted match
2434                 // of the last alternative. If this is a 'once through' set of alternatives then we
2435                 // have nothing to do - link this straight through to the End.
2436                 if (onceThrough)
2437                     m_backtrackingState.linkTo(endOp.m_reentry, this);
2438                 else {
2439                     // If we don't need to move the input poistion, and the pattern has a fixed size
2440                     // (in which case we omit the store of the start index until the pattern has matched)
2441                     // then we can just link the backtrack out of the last alternative straight to the
2442                     // head of the first alternative.
2443                     if (m_pattern.m_body->m_hasFixedSize
2444                         && (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize)
2445                         && (alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize == 1))
2446                         m_backtrackingState.linkTo(beginOp->m_reentry, this);
2447                     else if (m_pattern.sticky() && m_ops[op.m_nextOp].m_op == OpBodyAlternativeEnd) {
2448                         // It is a sticky pattern and the last alternative failed, jump to the end.
2449                         m_backtrackingState.takeBacktracksToJumpList(lastStickyAlternativeFailures, this);
2450                     } else {
2451                         // We need to generate a trampoline of code to execute before looping back
2452                         // around to the first alternative.
2453                         m_backtrackingState.link(this);
2454
2455                         // No need to advance and retry for a sticky pattern.
2456                         if (!m_pattern.sticky()) {
2457                             // If the pattern size is not fixed, then store the start index for use if we match.
2458                             if (!m_pattern.m_body->m_hasFixedSize) {
2459                                 if (alternative->m_minimumSize == 1)
2460                                     setMatchStart(index);
2461                                 else {
2462                                     move(index, regT0);
2463                                     if (alternative->m_minimumSize)
2464                                         sub32(Imm32(alternative->m_minimumSize - 1), regT0);
2465                                     else
2466                                         add32(TrustedImm32(1), regT0);
2467                                     setMatchStart(regT0);
2468                                 }
2469                             }
2470
2471                             // Generate code to loop. Check whether the last alternative is longer than the
2472                             // first (e.g. /a|xy/ or /a|xyz/).
2473                             if (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize) {
2474                                 // We want to loop, and increment input position. If the delta is 1, it is
2475                                 // already correctly incremented, if more than one then decrement as appropriate.
2476                                 unsigned delta = alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize;
2477                                 ASSERT(delta);
2478                                 if (delta != 1)
2479                                     sub32(Imm32(delta - 1), index);
2480                                 jump(beginOp->m_reentry);
2481                             } else {
2482                                 // If the first alternative has minimum size 0xFFFFFFFFu, then there cannot
2483                                 // be sufficent input available to handle this, so just fall through.
2484                                 unsigned delta = beginOp->m_alternative->m_minimumSize - alternative->m_minimumSize;
2485                                 if (delta != 0xFFFFFFFFu) {
2486                                     // We need to check input because we are incrementing the input.
2487                                     add32(Imm32(delta + 1), index);
2488                                     checkInput().linkTo(beginOp->m_reentry, this);
2489                                 }
2490                             }
2491                         }
2492                     }
2493                 }
2494
2495                 // We can reach this point in the code in two ways:
2496                 //  - Fallthrough from the code above (a repeating alternative backtracked out of its
2497                 //    last alternative, and did not have sufficent input to run the first).
2498                 //  - We will loop back up to the following label when a repeating alternative loops,
2499                 //    following a failed input check.
2500                 //
2501                 // Either way, we have just failed the input check for the first alternative.
2502                 Label firstInputCheckFailed(this);
2503
2504                 // Generate code to handle input check failures from alternatives except the last.
2505                 // prevOp is the alternative we're handling a bail out from (initially Begin), and
2506                 // nextOp is the alternative we will be attempting to reenter into.
2507                 // 
2508                 // We will link input check failures from the forwards matching path back to the code
2509                 // that can handle them.
2510                 YarrOp* prevOp = beginOp;
2511                 YarrOp* nextOp = &m_ops[beginOp->m_nextOp];
2512                 while (nextOp->m_op != OpBodyAlternativeEnd) {
2513                     prevOp->m_jumps.link(this);
2514
2515                     // We only get here if an input check fails, it is only worth checking again
2516                     // if the next alternative has a minimum size less than the last.
2517                     if (prevOp->m_alternative->m_minimumSize > nextOp->m_alternative->m_minimumSize) {
2518                         // FIXME: if we added an extra label to YarrOp, we could avoid needing to
2519                         // subtract delta back out, and reduce this code. Should performance test
2520                         // the benefit of this.
2521                         unsigned delta = prevOp->m_alternative->m_minimumSize - nextOp->m_alternative->m_minimumSize;
2522                         sub32(Imm32(delta), index);
2523                         Jump fail = jumpIfNoAvailableInput();
2524                         add32(Imm32(delta), index);
2525                         jump(nextOp->m_reentry);
2526                         fail.link(this);
2527                     } else if (prevOp->m_alternative->m_minimumSize < nextOp->m_alternative->m_minimumSize)
2528                         add32(Imm32(nextOp->m_alternative->m_minimumSize - prevOp->m_alternative->m_minimumSize), index);
2529                     prevOp = nextOp;
2530                     nextOp = &m_ops[nextOp->m_nextOp];
2531                 }
2532
2533                 // We fall through to here if there is insufficient input to run the last alternative.
2534
2535                 // If there is insufficient input to run the last alternative, then for 'once through'
2536                 // alternatives we are done - just jump back up into the forwards matching path at the End.
2537                 if (onceThrough) {
2538                     op.m_jumps.linkTo(endOp.m_reentry, this);
2539                     jump(endOp.m_reentry);
2540                     break;
2541                 }
2542
2543                 // For repeating alternatives, link any input check failure from the last alternative to
2544                 // this point.
2545                 op.m_jumps.link(this);
2546
2547                 bool needsToUpdateMatchStart = !m_pattern.m_body->m_hasFixedSize;
2548
2549                 // Check for cases where input position is already incremented by 1 for the last
2550                 // alternative (this is particularly useful where the minimum size of the body
2551                 // disjunction is 0, e.g. /a*|b/).
2552                 if (needsToUpdateMatchStart && alternative->m_minimumSize == 1) {
2553                     // index is already incremented by 1, so just store it now!
2554                     setMatchStart(index);
2555                     needsToUpdateMatchStart = false;
2556                 }
2557
2558                 if (!m_pattern.sticky()) {
2559                     // Check whether there is sufficient input to loop. Increment the input position by
2560                     // one, and check. Also add in the minimum disjunction size before checking - there
2561                     // is no point in looping if we're just going to fail all the input checks around
2562                     // the next iteration.
2563                     ASSERT(alternative->m_minimumSize >= m_pattern.m_body->m_minimumSize);
2564                     if (alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) {
2565                         // If the last alternative had the same minimum size as the disjunction,
2566                         // just simply increment input pos by 1, no adjustment based on minimum size.
2567                         add32(TrustedImm32(1), index);
2568                     } else {
2569                         // If the minumum for the last alternative was one greater than than that
2570                         // for the disjunction, we're already progressed by 1, nothing to do!
2571                         unsigned delta = (alternative->m_minimumSize - m_pattern.m_body->m_minimumSize) - 1;
2572                         if (delta)
2573                             sub32(Imm32(delta), index);
2574                     }
2575                     Jump matchFailed = jumpIfNoAvailableInput();
2576
2577                     if (needsToUpdateMatchStart) {
2578                         if (!m_pattern.m_body->m_minimumSize)
2579                             setMatchStart(index);
2580                         else {
2581                             move(index, regT0);
2582                             sub32(Imm32(m_pattern.m_body->m_minimumSize), regT0);
2583                             setMatchStart(regT0);
2584                         }
2585                     }
2586
2587                     // Calculate how much more input the first alternative requires than the minimum
2588                     // for the body as a whole. If no more is needed then we dont need an additional
2589                     // input check here - jump straight back up to the start of the first alternative.
2590                     if (beginOp->m_alternative->m_minimumSize == m_pattern.m_body->m_minimumSize)
2591                         jump(beginOp->m_reentry);
2592                     else {
2593                         if (beginOp->m_alternative->m_minimumSize > m_pattern.m_body->m_minimumSize)
2594                             add32(Imm32(beginOp->m_alternative->m_minimumSize - m_pattern.m_body->m_minimumSize), index);
2595                         else
2596                             sub32(Imm32(m_pattern.m_body->m_minimumSize - beginOp->m_alternative->m_minimumSize), index);
2597                         checkInput().linkTo(beginOp->m_reentry, this);
2598                         jump(firstInputCheckFailed);
2599                     }
2600
2601                     // We jump to here if we iterate to the point that there is insufficient input to
2602                     // run any matches, and need to return a failure state from JIT code.
2603                     matchFailed.link(this);
2604                 }
2605
2606                 lastStickyAlternativeFailures.link(this);
2607                 removeCallFrame();
2608                 generateFailReturn();
2609                 break;
2610             }
2611             case OpBodyAlternativeEnd: {
2612                 // We should never backtrack back into a body disjunction.
2613                 ASSERT(m_backtrackingState.isEmpty());
2614
2615                 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
2616                 m_checkedOffset += priorAlternative->m_minimumSize;
2617                 break;
2618             }
2619
2620             // OpSimpleNestedAlternativeBegin/Next/End
2621             // OpNestedAlternativeBegin/Next/End
2622             //
2623             // Generate code for when we backtrack back out of an alternative into
2624             // a Begin or Next node, or when the entry input count check fails. If
2625             // there are more alternatives we need to jump to the next alternative,
2626             // if not we backtrack back out of the current set of parentheses.
2627             //
2628             // In the case of non-simple nested assertions we need to also link the
2629             // 'return address' appropriately to backtrack back out into the correct
2630             // alternative.
2631             case OpSimpleNestedAlternativeBegin:
2632             case OpSimpleNestedAlternativeNext:
2633             case OpNestedAlternativeBegin:
2634             case OpNestedAlternativeNext: {
2635                 YarrOp& nextOp = m_ops[op.m_nextOp];
2636                 bool isBegin = op.m_previousOp == notFound;
2637                 bool isLastAlternative = nextOp.m_nextOp == notFound;
2638                 ASSERT(isBegin == (op.m_op == OpSimpleNestedAlternativeBegin || op.m_op == OpNestedAlternativeBegin));
2639                 ASSERT(isLastAlternative == (nextOp.m_op == OpSimpleNestedAlternativeEnd || nextOp.m_op == OpNestedAlternativeEnd));
2640
2641                 // Treat an input check failure the same as a failed match.
2642                 m_backtrackingState.append(op.m_jumps);
2643
2644                 // Set the backtracks to jump to the appropriate place. We may need
2645                 // to link the backtracks in one of three different way depending on
2646                 // the type of alternative we are dealing with:
2647                 //  - A single alternative, with no simplings.
2648                 //  - The last alternative of a set of two or more.
2649                 //  - An alternative other than the last of a set of two or more.
2650                 //
2651                 // In the case of a single alternative on its own, we don't need to
2652                 // jump anywhere - if the alternative fails to match we can just
2653                 // continue to backtrack out of the parentheses without jumping.
2654                 //
2655                 // In the case of the last alternative in a set of more than one, we
2656                 // need to jump to return back out to the beginning. We'll do so by
2657                 // adding a jump to the End node's m_jumps list, and linking this
2658                 // when we come to generate the Begin node. For alternatives other
2659                 // than the last, we need to jump to the next alternative.
2660                 //
2661                 // If the alternative had adjusted the input position we must link
2662                 // backtracking to here, correct, and then jump on. If not we can
2663                 // link the backtracks directly to their destination.
2664                 if (op.m_checkAdjust) {
2665                     // Handle the cases where we need to link the backtracks here.
2666                     m_backtrackingState.link(this);
2667                     sub32(Imm32(op.m_checkAdjust.unsafeGet()), index);
2668                     if (!isLastAlternative) {
2669                         // An alternative that is not the last should jump to its successor.
2670                         jump(nextOp.m_reentry);
2671                     } else if (!isBegin) {
2672                         // The last of more than one alternatives must jump back to the beginning.
2673                         nextOp.m_jumps.append(jump());
2674                     } else {
2675                         // A single alternative on its own can fall through.
2676                         m_backtrackingState.fallthrough();
2677                     }
2678                 } else {
2679                     // Handle the cases where we can link the backtracks directly to their destinations.
2680                     if (!isLastAlternative) {
2681                         // An alternative that is not the last should jump to its successor.
2682                         m_backtrackingState.linkTo(nextOp.m_reentry, this);
2683                     } else if (!isBegin) {
2684                         // The last of more than one alternatives must jump back to the beginning.
2685                         m_backtrackingState.takeBacktracksToJumpList(nextOp.m_jumps, this);
2686                     }
2687                     // In the case of a single alternative on its own do nothing - it can fall through.
2688                 }
2689
2690                 // If there is a backtrack jump from a zero length match link it here.
2691                 if (op.m_zeroLengthMatch.isSet())
2692                     m_backtrackingState.append(op.m_zeroLengthMatch);
2693
2694                 // At this point we've handled the backtracking back into this node.
2695                 // Now link any backtracks that need to jump to here.
2696
2697                 // For non-simple alternatives, link the alternative's 'return address'
2698                 // so that we backtrack back out into the previous alternative.
2699                 if (op.m_op == OpNestedAlternativeNext)
2700                     m_backtrackingState.append(op.m_returnAddress);
2701
2702                 // If there is more than one alternative, then the last alternative will
2703                 // have planted a jump to be linked to the end. This jump was added to the
2704                 // End node's m_jumps list. If we are back at the beginning, link it here.
2705                 if (isBegin) {
2706                     YarrOp* endOp = &m_ops[op.m_nextOp];
2707                     while (endOp->m_nextOp != notFound) {
2708                         ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
2709                         endOp = &m_ops[endOp->m_nextOp];
2710                     }
2711                     ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
2712                     m_backtrackingState.append(endOp->m_jumps);
2713                 }
2714
2715                 if (!isBegin) {
2716                     YarrOp& lastOp = m_ops[op.m_previousOp];
2717                     m_checkedOffset += lastOp.m_checkAdjust;
2718                 }
2719                 m_checkedOffset -= op.m_checkAdjust;
2720                 break;
2721             }
2722             case OpSimpleNestedAlternativeEnd:
2723             case OpNestedAlternativeEnd: {
2724                 PatternTerm* term = op.m_term;
2725
2726                 // If there is a backtrack jump from a zero length match link it here.
2727                 if (op.m_zeroLengthMatch.isSet())
2728                     m_backtrackingState.append(op.m_zeroLengthMatch);
2729
2730                 // If we backtrack into the end of a simple subpattern do nothing;
2731                 // just continue through into the last alternative. If we backtrack
2732                 // into the end of a non-simple set of alterntives we need to jump
2733                 // to the backtracking return address set up during generation.
2734                 if (op.m_op == OpNestedAlternativeEnd) {
2735                     m_backtrackingState.link(this);
2736
2737                     // Plant a jump to the return address.
2738                     unsigned parenthesesFrameLocation = term->frameLocation;
2739                     loadFromFrameAndJump(parenthesesFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
2740
2741                     // Link the DataLabelPtr associated with the end of the last
2742                     // alternative to this point.
2743                     m_backtrackingState.append(op.m_returnAddress);
2744                 }
2745
2746                 YarrOp& lastOp = m_ops[op.m_previousOp];
2747                 m_checkedOffset += lastOp.m_checkAdjust;
2748                 break;
2749             }
2750
2751             // OpParenthesesSubpatternOnceBegin/End
2752             //
2753             // When we are backtracking back out of a capturing subpattern we need
2754             // to clear the start index in the matches output array, to record that
2755             // this subpattern has not been captured.
2756             //
2757             // When backtracking back out of a Greedy quantified subpattern we need
2758             // to catch this, and try running the remainder of the alternative after
2759             // the subpattern again, skipping the parentheses.
2760             //
2761             // Upon backtracking back into a quantified set of parentheses we need to
2762             // check whether we were currently skipping the subpattern. If not, we
2763             // can backtrack into them, if we were we need to either backtrack back
2764             // out of the start of the parentheses, or jump back to the forwards
2765             // matching start, depending of whether the match is Greedy or NonGreedy.
2766             case OpParenthesesSubpatternOnceBegin: {
2767                 PatternTerm* term = op.m_term;
2768                 ASSERT(term->quantityMaxCount == 1);
2769
2770                 // We only need to backtrack to this point if capturing or greedy.
2771                 if ((term->capture() && compileMode == IncludeSubpatterns) || term->quantityType == QuantifierGreedy) {
2772                     m_backtrackingState.link(this);
2773
2774                     // If capturing, clear the capture (we only need to reset start).
2775                     if (term->capture() && compileMode == IncludeSubpatterns)
2776                         clearSubpatternStart(term->parentheses.subpatternId);
2777
2778                     // If Greedy, jump to the end.
2779                     if (term->quantityType == QuantifierGreedy) {
2780                         // Clear the flag in the stackframe indicating we ran through the subpattern.
2781                         unsigned parenthesesFrameLocation = term->frameLocation;
2782                         storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex());
2783                         // Jump to after the parentheses, skipping the subpattern.
2784                         jump(m_ops[op.m_nextOp].m_reentry);
2785                         // A backtrack from after the parentheses, when skipping the subpattern,
2786                         // will jump back to here.
2787                         op.m_jumps.link(this);
2788                     }
2789
2790                     m_backtrackingState.fallthrough();
2791                 }
2792                 break;
2793             }
2794             case OpParenthesesSubpatternOnceEnd: {
2795                 PatternTerm* term = op.m_term;
2796
2797                 if (term->quantityType != QuantifierFixedCount) {
2798                     m_backtrackingState.link(this);
2799
2800                     // Check whether we should backtrack back into the parentheses, or if we
2801                     // are currently in a state where we had skipped over the subpattern
2802                     // (in which case the flag value on the stack will be -1).
2803                     unsigned parenthesesFrameLocation = term->frameLocation;
2804                     Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, (parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex()) * sizeof(void*)), TrustedImm32(-1));
2805
2806                     if (term->quantityType == QuantifierGreedy) {
2807                         // For Greedy parentheses, we skip after having already tried going
2808                         // through the subpattern, so if we get here we're done.
2809                         YarrOp& beginOp = m_ops[op.m_previousOp];
2810                         beginOp.m_jumps.append(hadSkipped);
2811                     } else {
2812                         // For NonGreedy parentheses, we try skipping the subpattern first,
2813                         // so if we get here we need to try running through the subpattern
2814                         // next. Jump back to the start of the parentheses in the forwards
2815                         // matching path.
2816                         ASSERT(term->quantityType == QuantifierNonGreedy);
2817                         YarrOp& beginOp = m_ops[op.m_previousOp];
2818                         hadSkipped.linkTo(beginOp.m_reentry, this);
2819                     }
2820
2821                     m_backtrackingState.fallthrough();
2822                 }
2823
2824                 m_backtrackingState.append(op.m_jumps);
2825                 break;
2826             }
2827
2828             // OpParenthesesSubpatternTerminalBegin/End
2829             //
2830             // Terminal subpatterns will always match - there is nothing after them to
2831             // force a backtrack, and they have a minimum count of 0, and as such will
2832             // always produce an acceptable result.
2833             case OpParenthesesSubpatternTerminalBegin: {
2834                 // We will backtrack to this point once the subpattern cannot match any
2835                 // more. Since no match is accepted as a successful match (we are Greedy
2836                 // quantified with a minimum of zero) jump back to the forwards matching
2837                 // path at the end.
2838                 YarrOp& endOp = m_ops[op.m_nextOp];
2839                 m_backtrackingState.linkTo(endOp.m_reentry, this);
2840                 break;
2841             }
2842             case OpParenthesesSubpatternTerminalEnd:
2843                 // We should never be backtracking to here (hence the 'terminal' in the name).
2844                 ASSERT(m_backtrackingState.isEmpty());
2845                 m_backtrackingState.append(op.m_jumps);
2846                 break;
2847
2848             // OpParenthesesSubpatternBegin/End
2849             //
2850             // When we are backtracking back out of a capturing subpattern we need
2851             // to clear the start index in the matches output array, to record that
2852             // this subpattern has not been captured.
2853             //
2854             // When backtracking back out of a Greedy quantified subpattern we need
2855             // to catch this, and try running the remainder of the alternative after
2856             // the subpattern again, skipping the parentheses.
2857             //
2858             // Upon backtracking back into a quantified set of parentheses we need to
2859             // check whether we were currently skipping the subpattern. If not, we
2860             // can backtrack into them, if we were we need to either backtrack back
2861             // out of the start of the parentheses, or jump back to the forwards
2862             // matching start, depending of whether the match is Greedy or NonGreedy.
2863             case OpParenthesesSubpatternBegin: {
2864 #if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
2865                 PatternTerm* term = op.m_term;
2866                 unsigned parenthesesFrameLocation = term->frameLocation;
2867
2868                 if (term->quantityType != QuantifierFixedCount) {
2869                     m_backtrackingState.link(this);
2870
2871                     if (term->quantityType == QuantifierGreedy) {
2872                         RegisterID currParenContextReg = regT0;
2873                         RegisterID newParenContextReg = regT1;
2874
2875                         loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex(), currParenContextReg);
2876
2877                         restoreParenContext(currParenContextReg, regT2, term->parentheses.subpatternId, term->parentheses.lastSubpatternId, parenthesesFrameLocation);
2878
2879                         freeParenContext(currParenContextReg, newParenContextReg);
2880                         storeToFrame(newParenContextReg, parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
2881                         const RegisterID countTemporary = regT0;
2882                         loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), countTemporary);
2883                         Jump zeroLengthMatch = branchTest32(Zero, countTemporary);
2884
2885                         sub32(TrustedImm32(1), countTemporary);
2886                         storeToFrame(countTemporary, parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
2887
2888                         jump(m_ops[op.m_nextOp].m_reentry);
2889
2890                         zeroLengthMatch.link(this);
2891
2892                         // Clear the flag in the stackframe indicating we didn't run through the subpattern.
2893                         storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
2894
2895                         jump(m_ops[op.m_nextOp].m_reentry);
2896                     }
2897
2898                     // If Greedy, jump to the end.
2899                     if (term->quantityType == QuantifierGreedy) {
2900                         // A backtrack from after the parentheses, when skipping the subpattern,
2901                         // will jump back to here.
2902                         op.m_jumps.link(this);
2903                     }
2904
2905                     m_backtrackingState.fallthrough();
2906                 }
2907 #else // !YARR_JIT_ALL_PARENS_EXPRESSIONS
2908                 RELEASE_ASSERT_NOT_REACHED();
2909 #endif
2910                 break;
2911             }
2912             case OpParenthesesSubpatternEnd: {
2913 #if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
2914                 PatternTerm* term = op.m_term;
2915
2916                 if (term->quantityType != QuantifierFixedCount) {
2917                     m_backtrackingState.link(this);
2918
2919                     // Check whether we should backtrack back into the parentheses, or if we
2920                     // are currently in a state where we had skipped over the subpattern
2921                     // (in which case the flag value on the stack will be -1).
2922                     unsigned parenthesesFrameLocation = term->frameLocation;
2923                     Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, (parenthesesFrameLocation  + BackTrackInfoParentheses::beginIndex()) * sizeof(void*)), TrustedImm32(-1));
2924
2925                     if (term->quantityType == QuantifierGreedy) {
2926                         // For Greedy parentheses, we skip after having already tried going
2927                         // through the subpattern, so if we get here we're done.
2928                         YarrOp& beginOp = m_ops[op.m_previousOp];
2929                         beginOp.m_jumps.append(hadSkipped);
2930                     } else {
2931                         // For NonGreedy parentheses, we try skipping the subpattern first,
2932                         // so if we get here we need to try running through the subpattern
2933                         // next. Jump back to the start of the parentheses in the forwards
2934                         // matching path.
2935                         ASSERT(term->quantityType == QuantifierNonGreedy);
2936                         YarrOp& beginOp = m_ops[op.m_previousOp];
2937                         hadSkipped.linkTo(beginOp.m_reentry, this);
2938                     }
2939
2940                     m_backtrackingState.fallthrough();
2941                 }
2942
2943                 m_backtrackingState.append(op.m_jumps);
2944 #else // !YARR_JIT_ALL_PARENS_EXPRESSIONS
2945                 RELEASE_ASSERT_NOT_REACHED();
2946 #endif
2947                 break;
2948             }
2949
2950             // OpParentheticalAssertionBegin/End
2951             case OpParentheticalAssertionBegin: {
2952                 PatternTerm* term = op.m_term;
2953                 YarrOp& endOp = m_ops[op.m_nextOp];
2954
2955                 // We need to handle the backtracks upon backtracking back out
2956                 // of a parenthetical assertion if either we need to correct
2957                 // the input index, or the assertion was inverted.
2958                 if (op.m_checkAdjust || term->invert()) {
2959                      m_backtrackingState.link(this);
2960
2961                     if (op.m_checkAdjust)
2962                         add32(Imm32(op.m_checkAdjust.unsafeGet()), index);
2963
2964                     // In an inverted assertion failure to match the subpattern
2965                     // is treated as a successful match - jump to the end of the
2966                     // subpattern. We already have adjusted the input position
2967                     // back to that before the assertion, which is correct.
2968                     if (term->invert())
2969                         jump(endOp.m_reentry);
2970
2971                     m_backtrackingState.fallthrough();
2972                 }
2973
2974                 // The End node's jump list will contain any backtracks into
2975                 // the end of the assertion. Also, if inverted, we will have
2976                 // added the failure caused by a successful match to this.
2977                 m_backtrackingState.append(endOp.m_jumps);
2978
2979                 m_checkedOffset += op.m_checkAdjust;
2980                 break;
2981             }
2982             case OpParentheticalAssertionEnd: {
2983                 // FIXME: We should really be clearing any nested subpattern
2984                 // matches on bailing out from after the pattern. Firefox has
2985                 // this bug too (presumably because they use YARR!)
2986
2987                 // Never backtrack into an assertion; later failures bail to before the begin.
2988                 m_backtrackingState.takeBacktracksToJumpList(op.m_jumps, this);
2989
2990                 YarrOp& lastOp = m_ops[op.m_previousOp];
2991                 m_checkedOffset -= lastOp.m_checkAdjust;
2992                 break;
2993             }
2994
2995             case OpMatchFailed:
2996                 break;
2997             }
2998
2999         } while (opIndex);
3000     }
3001
3002     // Compilation methods:
3003     // ====================
3004
3005     // opCompileParenthesesSubpattern
3006     // Emits ops for a subpattern (set of parentheses). These consist
3007     // of a set of alternatives wrapped in an outer set of nodes for
3008     // the parentheses.
3009     // Supported types of parentheses are 'Once' (quantityMaxCount == 1),
3010     // 'Terminal' (non-capturing parentheses quantified as greedy
3011     // and infinite), and 0 based greedy quantified parentheses.
3012     // Alternatives will use the 'Simple' set of ops if either the
3013     // subpattern is terminal (in which case we will never need to
3014     // backtrack), or if the subpattern only contains one alternative.
3015     void opCompileParenthesesSubpattern(PatternTerm* term)
3016     {
3017         YarrOpCode parenthesesBeginOpCode;
3018         YarrOpCode parenthesesEndOpCode;
3019         YarrOpCode alternativeBeginOpCode = OpSimpleNestedAlternativeBegin;
3020         YarrOpCode alternativeNextOpCode = OpSimpleNestedAlternativeNext;
3021         YarrOpCode alternativeEndOpCode = OpSimpleNestedAlternativeEnd;
3022
3023         // We can currently only compile quantity 1 subpatterns that are
3024         // not copies. We generate a copy in the case of a range quantifier,
3025         // e.g. /(?:x){3,9}/, or /(?:x)+/ (These are effectively expanded to
3026         // /(?:x){3,3}(?:x){0,6}/ and /(?:x)(?:x)*/ repectively). The problem
3027         // comes where the subpattern is capturing, in which case we would
3028         // need to restore the capture from the first subpattern upon a
3029         // failure in the second.
3030         if (term->quantityMinCount && term->quantityMinCount != term->quantityMaxCount) {
3031             m_failureReason = JITFailureReason::VariableCountedParenthesisWithNonZeroMinimum;
3032             return;
3033         } if (term->quantityMaxCount == 1 && !term->parentheses.isCopy) {
3034             // Select the 'Once' nodes.
3035             parenthesesBeginOpCode = OpParenthesesSubpatternOnceBegin;
3036             parenthesesEndOpCode = OpParenthesesSubpatternOnceEnd;
3037
3038             // If there is more than one alternative we cannot use the 'simple' nodes.
3039             if (term->parentheses.disjunction->m_alternatives.size() != 1) {
3040                 alternativeBeginOpCode = OpNestedAlternativeBegin;
3041                 alternativeNextOpCode = OpNestedAlternativeNext;
3042                 alternativeEndOpCode = OpNestedAlternativeEnd;
3043             }
3044         } else if (term->parentheses.isTerminal) {
3045             // Select the 'Terminal' nodes.
3046             parenthesesBeginOpCode = OpParenthesesSubpatternTerminalBegin;
3047             parenthesesEndOpCode = OpParenthesesSubpattern