[JSC] Micro-optimize YarrJIT's surrogate pair handling
[WebKit-https.git] / Source / JavaScriptCore / yarr / YarrJIT.cpp
1 /*
2  * Copyright (C) 2009-2018 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
24  */
25
26 #include "config.h"
27 #include "YarrJIT.h"
28
29 #include <wtf/ASCIICType.h>
30 #include "LinkBuffer.h"
31 #include "Options.h"
32 #include "VM.h"
33 #include "Yarr.h"
34 #include "YarrCanonicalize.h"
35 #include "YarrDisassembler.h"
36
37 #if ENABLE(YARR_JIT)
38
39 namespace JSC { namespace Yarr {
40
41 template<YarrJITCompileMode compileMode>
42 class YarrGenerator : public YarrJITInfo, private MacroAssembler {
43
44 #if CPU(ARM_THUMB2)
45     static const RegisterID input = ARMRegisters::r0;
46     static const RegisterID index = ARMRegisters::r1;
47     static const RegisterID length = ARMRegisters::r2;
48     static const RegisterID output = ARMRegisters::r3;
49
50     static const RegisterID regT0 = ARMRegisters::r4;
51     static const RegisterID regT1 = ARMRegisters::r5;
52     static const RegisterID initialStart = ARMRegisters::r8;
53
54     static const RegisterID returnRegister = ARMRegisters::r0;
55     static const RegisterID returnRegister2 = ARMRegisters::r1;
56
57 #define HAVE_INITIAL_START_REG
58 #elif CPU(ARM64)
59     // Argument registers
60     static const RegisterID input = ARM64Registers::x0;
61     static const RegisterID index = ARM64Registers::x1;
62     static const RegisterID length = ARM64Registers::x2;
63     static const RegisterID output = ARM64Registers::x3;
64     static const RegisterID freelistRegister = ARM64Registers::x4;
65     static const RegisterID freelistSizeRegister = ARM64Registers::x5;
66
67     // Scratch registers
68     static const RegisterID regT0 = ARM64Registers::x6;
69     static const RegisterID regT1 = ARM64Registers::x7;
70     static const RegisterID regT2 = ARM64Registers::x8;
71     static const RegisterID remainingMatchCount = ARM64Registers::x9;
72     static const RegisterID regUnicodeInputAndTrail = ARM64Registers::x10;
73     static const RegisterID initialStart = ARM64Registers::x11;
74     static const RegisterID supplementaryPlanesBase = ARM64Registers::x12;
75     static const RegisterID leadingSurrogateTag = ARM64Registers::x13;
76     static const RegisterID trailingSurrogateTag = ARM64Registers::x14;
77     static const RegisterID endOfStringAddress = ARM64Registers::x15;
78
79     static const RegisterID returnRegister = ARM64Registers::x0;
80     static const RegisterID returnRegister2 = ARM64Registers::x1;
81
82     const TrustedImm32 surrogateTagMask = TrustedImm32(0xfffffc00);
83 #define HAVE_INITIAL_START_REG
84 #define JIT_UNICODE_EXPRESSIONS
85 #elif CPU(MIPS)
86     static const RegisterID input = MIPSRegisters::a0;
87     static const RegisterID index = MIPSRegisters::a1;
88     static const RegisterID length = MIPSRegisters::a2;
89     static const RegisterID output = MIPSRegisters::a3;
90
91     static const RegisterID regT0 = MIPSRegisters::t4;
92     static const RegisterID regT1 = MIPSRegisters::t5;
93     static const RegisterID initialStart = MIPSRegisters::t6;
94
95     static const RegisterID returnRegister = MIPSRegisters::v0;
96     static const RegisterID returnRegister2 = MIPSRegisters::v1;
97
98 #define HAVE_INITIAL_START_REG
99 #elif CPU(X86)
100     static const RegisterID input = X86Registers::eax;
101     static const RegisterID index = X86Registers::edx;
102     static const RegisterID length = X86Registers::ecx;
103     static const RegisterID output = X86Registers::edi;
104
105     static const RegisterID regT0 = X86Registers::ebx;
106     static const RegisterID regT1 = X86Registers::esi;
107
108     static const RegisterID returnRegister = X86Registers::eax;
109     static const RegisterID returnRegister2 = X86Registers::edx;
110 #elif CPU(X86_64)
111 #if !OS(WINDOWS)
112     // Argument registers
113     static const RegisterID input = X86Registers::edi;
114     static const RegisterID index = X86Registers::esi;
115     static const RegisterID length = X86Registers::edx;
116     static const RegisterID output = X86Registers::ecx;
117     static const RegisterID freelistRegister = X86Registers::r8;
118     static const RegisterID freelistSizeRegister = X86Registers::r9; // Only used during initialization.
119 #else
120     // If the return value doesn't fit in 64bits, its destination is pointed by rcx and the parameters are shifted.
121     // http://msdn.microsoft.com/en-us/library/7572ztz4.aspx
122     COMPILE_ASSERT(sizeof(MatchResult) > sizeof(void*), MatchResult_does_not_fit_in_64bits);
123     static const RegisterID input = X86Registers::edx;
124     static const RegisterID index = X86Registers::r8;
125     static const RegisterID length = X86Registers::r9;
126     static const RegisterID output = X86Registers::r10;
127 #endif
128
129     // Scratch registers
130     static const RegisterID regT0 = X86Registers::eax;
131 #if !OS(WINDOWS)
132     static const RegisterID regT1 = X86Registers::r9;
133     static const RegisterID regT2 = X86Registers::r10;
134 #else
135     static const RegisterID regT1 = X86Registers::ecx;
136     static const RegisterID regT2 = X86Registers::edi;
137 #endif
138
139     static const RegisterID initialStart = X86Registers::ebx;
140 #if !OS(WINDOWS)
141     static const RegisterID remainingMatchCount = X86Registers::r12;
142 #else
143     static const RegisterID remainingMatchCount = X86Registers::esi;
144 #endif
145     static const RegisterID regUnicodeInputAndTrail = X86Registers::r13;
146     static const RegisterID leadingSurrogateTag = X86Registers::r14;
147     static const RegisterID endOfStringAddress = X86Registers::r15;
148
149     static const RegisterID returnRegister = X86Registers::eax;
150     static const RegisterID returnRegister2 = X86Registers::edx;
151
152     const TrustedImm32 supplementaryPlanesBase = TrustedImm32(0x10000);
153     const TrustedImm32 trailingSurrogateTag = TrustedImm32(0xdc00);
154     const TrustedImm32 surrogateTagMask = TrustedImm32(0xfffffc00);
155 #define HAVE_INITIAL_START_REG
156 #define JIT_UNICODE_EXPRESSIONS
157 #endif
158
159 #if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
160     struct ParenContextSizes {
161         size_t m_numSubpatterns;
162         size_t m_frameSlots;
163
164         ParenContextSizes(size_t numSubpatterns, size_t frameSlots)
165             : m_numSubpatterns(numSubpatterns)
166             , m_frameSlots(frameSlots)
167         {
168         }
169
170         size_t numSubpatterns() { return m_numSubpatterns; }
171
172         size_t frameSlots() { return m_frameSlots; }
173     };
174
175     struct ParenContext {
176         struct ParenContext* next;
177         uint32_t begin;
178         uint32_t matchAmount;
179         uintptr_t returnAddress;
180         struct Subpatterns {
181             unsigned start;
182             unsigned end;
183         } subpatterns[0];
184         uintptr_t frameSlots[0];
185
186         static size_t sizeFor(ParenContextSizes& parenContextSizes)
187         {
188             return sizeof(ParenContext) + sizeof(Subpatterns) * parenContextSizes.numSubpatterns() + sizeof(uintptr_t) * parenContextSizes.frameSlots();
189         }
190
191         static ptrdiff_t nextOffset()
192         {
193             return offsetof(ParenContext, next);
194         }
195
196         static ptrdiff_t beginOffset()
197         {
198             return offsetof(ParenContext, begin);
199         }
200
201         static ptrdiff_t matchAmountOffset()
202         {
203             return offsetof(ParenContext, matchAmount);
204         }
205
206         static ptrdiff_t returnAddressOffset()
207         {
208             return offsetof(ParenContext, returnAddress);
209         }
210
211         static ptrdiff_t subpatternOffset(size_t subpattern)
212         {
213             return offsetof(ParenContext, subpatterns) + (subpattern - 1) * sizeof(Subpatterns);
214         }
215
216         static ptrdiff_t savedFrameOffset(ParenContextSizes& parenContextSizes)
217         {
218             return offsetof(ParenContext, subpatterns) + (parenContextSizes.numSubpatterns()) * sizeof(Subpatterns);
219         }
220     };
221
222     void initParenContextFreeList()
223     {
224         RegisterID parenContextPointer = regT0;
225         RegisterID nextParenContextPointer = regT2;
226
227         size_t parenContextSize = ParenContext::sizeFor(m_parenContextSizes);
228
229         parenContextSize = WTF::roundUpToMultipleOf<sizeof(uintptr_t)>(parenContextSize);
230
231         if (parenContextSize > VM::patternContextBufferSize) {
232             m_failureReason = JITFailureReason::ParenthesisNestedTooDeep;
233             return;
234         }
235
236         Jump emptyFreeList = branchTestPtr(Zero, freelistRegister);
237         move(freelistRegister, parenContextPointer);
238         addPtr(TrustedImm32(parenContextSize), freelistRegister, nextParenContextPointer);
239         addPtr(freelistRegister, freelistSizeRegister);
240         subPtr(TrustedImm32(parenContextSize), freelistSizeRegister);
241
242         Label loopTop(this);
243         Jump initDone = branchPtr(Above, nextParenContextPointer, freelistSizeRegister);
244         storePtr(nextParenContextPointer, Address(parenContextPointer, ParenContext::nextOffset()));
245         move(nextParenContextPointer, parenContextPointer);
246         addPtr(TrustedImm32(parenContextSize), parenContextPointer, nextParenContextPointer);
247         jump(loopTop);
248
249         initDone.link(this);
250         storePtr(TrustedImmPtr(nullptr), Address(parenContextPointer, ParenContext::nextOffset()));
251         emptyFreeList.link(this);
252     }
253
254     void allocateParenContext(RegisterID result)
255     {
256         m_abortExecution.append(branchTestPtr(Zero, freelistRegister));
257         sub32(TrustedImm32(1), remainingMatchCount);
258         m_hitMatchLimit.append(branchTestPtr(Zero, remainingMatchCount));
259         move(freelistRegister, result);
260         loadPtr(Address(freelistRegister, ParenContext::nextOffset()), freelistRegister);
261     }
262
263     void freeParenContext(RegisterID headPtrRegister, RegisterID newHeadPtrRegister)
264     {
265         loadPtr(Address(headPtrRegister, ParenContext::nextOffset()), newHeadPtrRegister);
266         storePtr(freelistRegister, Address(headPtrRegister, ParenContext::nextOffset()));
267         move(headPtrRegister, freelistRegister);
268     }
269
270     void saveParenContext(RegisterID parenContextReg, RegisterID tempReg, unsigned firstSubpattern, unsigned lastSubpattern, unsigned subpatternBaseFrameLocation)
271     {
272         store32(index, Address(parenContextReg, ParenContext::beginOffset()));
273         loadFromFrame(subpatternBaseFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), tempReg);
274         store32(tempReg, Address(parenContextReg, ParenContext::matchAmountOffset()));
275         loadFromFrame(subpatternBaseFrameLocation + BackTrackInfoParentheses::returnAddressIndex(), tempReg);
276         storePtr(tempReg, Address(parenContextReg, ParenContext::returnAddressOffset()));
277         if (compileMode == IncludeSubpatterns) {
278             for (unsigned subpattern = firstSubpattern; subpattern <= lastSubpattern; subpattern++) {
279                 loadPtr(Address(output, (subpattern << 1) * sizeof(unsigned)), tempReg);
280                 storePtr(tempReg, Address(parenContextReg, ParenContext::subpatternOffset(subpattern)));
281                 clearSubpatternStart(subpattern);
282             }
283         }
284         subpatternBaseFrameLocation += YarrStackSpaceForBackTrackInfoParentheses;
285         for (unsigned frameLocation = subpatternBaseFrameLocation; frameLocation < m_parenContextSizes.frameSlots(); frameLocation++) {
286             loadFromFrame(frameLocation, tempReg);
287             storePtr(tempReg, Address(parenContextReg, ParenContext::savedFrameOffset(m_parenContextSizes) + frameLocation * sizeof(uintptr_t)));
288         }
289     }
290
291     void restoreParenContext(RegisterID parenContextReg, RegisterID tempReg, unsigned firstSubpattern, unsigned lastSubpattern, unsigned subpatternBaseFrameLocation)
292     {
293         load32(Address(parenContextReg, ParenContext::beginOffset()), index);
294         storeToFrame(index, subpatternBaseFrameLocation + BackTrackInfoParentheses::beginIndex());
295         load32(Address(parenContextReg, ParenContext::matchAmountOffset()), tempReg);
296         storeToFrame(tempReg, subpatternBaseFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
297         loadPtr(Address(parenContextReg, ParenContext::returnAddressOffset()), tempReg);
298         storeToFrame(tempReg, subpatternBaseFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
299         if (compileMode == IncludeSubpatterns) {
300             for (unsigned subpattern = firstSubpattern; subpattern <= lastSubpattern; subpattern++) {
301                 loadPtr(Address(parenContextReg, ParenContext::subpatternOffset(subpattern)), tempReg);
302                 storePtr(tempReg, Address(output, (subpattern << 1) * sizeof(unsigned)));
303             }
304         }
305         subpatternBaseFrameLocation += YarrStackSpaceForBackTrackInfoParentheses;
306         for (unsigned frameLocation = subpatternBaseFrameLocation; frameLocation < m_parenContextSizes.frameSlots(); frameLocation++) {
307             loadPtr(Address(parenContextReg, ParenContext::savedFrameOffset(m_parenContextSizes) + frameLocation * sizeof(uintptr_t)), tempReg);
308             storeToFrame(tempReg, frameLocation);
309         }
310     }
311 #endif
312
313     void optimizeAlternative(PatternAlternative* alternative)
314     {
315         if (!alternative->m_terms.size())
316             return;
317
318         for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) {
319             PatternTerm& term = alternative->m_terms[i];
320             PatternTerm& nextTerm = alternative->m_terms[i + 1];
321
322             // We can move BMP only character classes after fixed character terms.
323             if ((term.type == PatternTerm::TypeCharacterClass)
324                 && (term.quantityType == QuantifierFixedCount)
325                 && (!m_decodeSurrogatePairs || (term.characterClass->hasOneCharacterSize() && !term.m_invert))
326                 && (nextTerm.type == PatternTerm::TypePatternCharacter)
327                 && (nextTerm.quantityType == QuantifierFixedCount)) {
328                 PatternTerm termCopy = term;
329                 alternative->m_terms[i] = nextTerm;
330                 alternative->m_terms[i + 1] = termCopy;
331             }
332         }
333     }
334
335     void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar32* matches, unsigned matchCount)
336     {
337         do {
338             // pick which range we're going to generate
339             int which = count >> 1;
340             char lo = ranges[which].begin;
341             char hi = ranges[which].end;
342
343             // check if there are any ranges or matches below lo.  If not, just jl to failure -
344             // if there is anything else to check, check that first, if it falls through jmp to failure.
345             if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
346                 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
347
348                 // generate code for all ranges before this one
349                 if (which)
350                     matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
351
352                 while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
353                     matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
354                     ++*matchIndex;
355                 }
356                 failures.append(jump());
357
358                 loOrAbove.link(this);
359             } else if (which) {
360                 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
361
362                 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
363                 failures.append(jump());
364
365                 loOrAbove.link(this);
366             } else
367                 failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));
368
369             while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
370                 ++*matchIndex;
371
372             matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
373             // fall through to here, the value is above hi.
374
375             // shuffle along & loop around if there are any more matches to handle.
376             unsigned next = which + 1;
377             ranges += next;
378             count -= next;
379         } while (count);
380     }
381
382     void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
383     {
384         if (charClass->m_table && !m_decodeSurrogatePairs) {
385             ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table));
386             matchDest.append(branchTest8(charClass->m_tableInverted ? Zero : NonZero, tableEntry));
387             return;
388         }
389
390         JumpList unicodeFail;
391         if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
392             JumpList isAscii;
393             if (charClass->m_matches.size() || charClass->m_ranges.size())
394                 isAscii.append(branch32(LessThanOrEqual, character, TrustedImm32(0x7f)));
395
396             if (charClass->m_matchesUnicode.size()) {
397                 for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
398                     UChar32 ch = charClass->m_matchesUnicode[i];
399                     matchDest.append(branch32(Equal, character, Imm32(ch)));
400                 }
401             }
402
403             if (charClass->m_rangesUnicode.size()) {
404                 for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
405                     UChar32 lo = charClass->m_rangesUnicode[i].begin;
406                     UChar32 hi = charClass->m_rangesUnicode[i].end;
407
408                     Jump below = branch32(LessThan, character, Imm32(lo));
409                     matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
410                     below.link(this);
411                 }
412             }
413
414             if (charClass->m_matches.size() || charClass->m_ranges.size())
415                 unicodeFail = jump();
416             isAscii.link(this);
417         }
418
419         if (charClass->m_ranges.size()) {
420             unsigned matchIndex = 0;
421             JumpList failures;
422             matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size());
423             while (matchIndex < charClass->m_matches.size())
424                 matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++])));
425
426             failures.link(this);
427         } else if (charClass->m_matches.size()) {
428             // optimization: gather 'a','A' etc back together, can mask & test once.
429             Vector<char> matchesAZaz;
430
431             for (unsigned i = 0; i < charClass->m_matches.size(); ++i) {
432                 char ch = charClass->m_matches[i];
433                 if (m_pattern.ignoreCase()) {
434                     if (isASCIILower(ch)) {
435                         matchesAZaz.append(ch);
436                         continue;
437                     }
438                     if (isASCIIUpper(ch))
439                         continue;
440                 }
441                 matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
442             }
443
444             if (unsigned countAZaz = matchesAZaz.size()) {
445                 or32(TrustedImm32(32), character);
446                 for (unsigned i = 0; i < countAZaz; ++i)
447                     matchDest.append(branch32(Equal, character, TrustedImm32(matchesAZaz[i])));
448             }
449         }
450
451         if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size())
452             unicodeFail.link(this);
453     }
454
455 #ifdef JIT_UNICODE_EXPRESSIONS
456     void advanceIndexAfterCharacterClassTermMatch(const PatternTerm* term, JumpList& failures, const RegisterID character)
457     {
458         ASSERT(term->type == PatternTerm::TypeCharacterClass);
459
460         if (term->characterClass->hasOneCharacterSize() && !term->invert())
461             add32(TrustedImm32(term->characterClass->hasNonBMPCharacters() ? 2 : 1), index);
462         else {
463             add32(TrustedImm32(1), index);
464             failures.append(atEndOfInput());
465             Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
466             add32(TrustedImm32(1), index);
467             isBMPChar.link(this);
468         }
469     }
470 #endif
471
472     // Jumps if input not available; will have (incorrectly) incremented already!
473     Jump jumpIfNoAvailableInput(unsigned countToCheck = 0)
474     {
475         if (countToCheck)
476             add32(Imm32(countToCheck), index);
477         return branch32(Above, index, length);
478     }
479
480     Jump jumpIfAvailableInput(unsigned countToCheck)
481     {
482         add32(Imm32(countToCheck), index);
483         return branch32(BelowOrEqual, index, length);
484     }
485
486     Jump checkNotEnoughInput(RegisterID additionalAmount)
487     {
488         add32(index, additionalAmount);
489         return branch32(Above, additionalAmount, length);
490     }
491
492     Jump checkInput()
493     {
494         return branch32(BelowOrEqual, index, length);
495     }
496
497     Jump atEndOfInput()
498     {
499         return branch32(Equal, index, length);
500     }
501
502     Jump notAtEndOfInput()
503     {
504         return branch32(NotEqual, index, length);
505     }
506
507     BaseIndex negativeOffsetIndexedAddress(Checked<unsigned> negativeCharacterOffset, RegisterID tempReg, RegisterID indexReg = index)
508     {
509         RegisterID base = input;
510
511         // BaseIndex() addressing can take a int32_t offset. Given that we can have a regular
512         // expression that has unsigned character offsets, BaseIndex's signed offset is insufficient
513         // for addressing in extreme cases where we might underflow. Therefore we check to see if
514         // negativeCharacterOffset will underflow directly or after converting for 16 bit characters.
515         // If so, we do our own address calculating by adjusting the base, using the result register
516         // as a temp address register.
517         unsigned maximumNegativeOffsetForCharacterSize = m_charSize == Char8 ? 0x7fffffff : 0x3fffffff;
518         unsigned offsetAdjustAmount = 0x40000000;
519         if (negativeCharacterOffset.unsafeGet() > maximumNegativeOffsetForCharacterSize) {
520             base = tempReg;
521             move(input, base);
522             while (negativeCharacterOffset.unsafeGet() > maximumNegativeOffsetForCharacterSize) {
523                 subPtr(TrustedImm32(offsetAdjustAmount), base);
524                 if (m_charSize != Char8)
525                     subPtr(TrustedImm32(offsetAdjustAmount), base);
526                 negativeCharacterOffset -= offsetAdjustAmount;
527             }
528         }
529
530         Checked<int32_t> characterOffset(-static_cast<int32_t>(negativeCharacterOffset.unsafeGet()));
531
532         if (m_charSize == Char8)
533             return BaseIndex(input, indexReg, TimesOne, (characterOffset * static_cast<int32_t>(sizeof(char))).unsafeGet());
534
535         return BaseIndex(input, indexReg, TimesTwo, (characterOffset * static_cast<int32_t>(sizeof(UChar))).unsafeGet());
536     }
537
538 #ifdef JIT_UNICODE_EXPRESSIONS
539     void tryReadUnicodeCharImpl(RegisterID resultReg)
540     {
541         ASSERT(m_charSize == Char16);
542
543         JumpList notUnicode;
544
545         load16Unaligned(regUnicodeInputAndTrail, resultReg);
546
547         // Is the character a leading surrogate?
548         and32(surrogateTagMask, resultReg, regT2);
549         notUnicode.append(branch32(NotEqual, regT2, leadingSurrogateTag));
550
551         // Is the input long enough to read a trailing surrogate?
552         addPtr(TrustedImm32(2), regUnicodeInputAndTrail);
553         notUnicode.append(branchPtr(AboveOrEqual, regUnicodeInputAndTrail, endOfStringAddress));
554
555         // Is the character a trailing surrogate?
556         load16Unaligned(Address(regUnicodeInputAndTrail), regUnicodeInputAndTrail);
557         and32(surrogateTagMask, regUnicodeInputAndTrail, regT2);
558         notUnicode.append(branch32(NotEqual, regT2, trailingSurrogateTag));
559
560         // Combine leading and trailing surrogates to produce a code point.
561         lshift32(TrustedImm32(10), resultReg);
562         getEffectiveAddress(BaseIndex(resultReg, regUnicodeInputAndTrail, TimesOne, -U16_SURROGATE_OFFSET), resultReg);
563         notUnicode.link(this);
564     }
565
566     void tryReadUnicodeChar(BaseIndex address, RegisterID resultReg)
567     {
568         ASSERT(m_charSize == Char16);
569
570         getEffectiveAddress(address, regUnicodeInputAndTrail);
571
572         if (resultReg == regT0)
573             m_tryReadUnicodeCharacterCalls.append(nearCall());
574         else
575             tryReadUnicodeCharImpl(resultReg);
576     }
577 #endif
578
579     void readCharacterDontDecodeSurrogates(Checked<unsigned> negativeCharacterOffset, RegisterID resultReg, RegisterID indexReg = index)
580     {
581         BaseIndex address = negativeOffsetIndexedAddress(negativeCharacterOffset, resultReg, indexReg);
582         
583         if (m_charSize == Char8)
584             load8(address, resultReg);
585         else
586             load16Unaligned(address, resultReg);
587     }
588     
589     void readCharacter(Checked<unsigned> negativeCharacterOffset, RegisterID resultReg, RegisterID indexReg = index)
590     {
591         BaseIndex address = negativeOffsetIndexedAddress(negativeCharacterOffset, resultReg, indexReg);
592
593         if (m_charSize == Char8)
594             load8(address, resultReg);
595 #ifdef JIT_UNICODE_EXPRESSIONS
596         else if (m_decodeSurrogatePairs)
597             tryReadUnicodeChar(address, resultReg);
598 #endif
599         else
600             load16Unaligned(address, resultReg);
601     }
602
603     Jump jumpIfCharNotEquals(UChar32 ch, Checked<unsigned> negativeCharacterOffset, RegisterID character)
604     {
605         readCharacter(negativeCharacterOffset, character);
606
607         // For case-insesitive compares, non-ascii characters that have different
608         // upper & lower case representations are converted to a character class.
609         ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(ch) || isCanonicallyUnique(ch, m_canonicalMode));
610         if (m_pattern.ignoreCase() && isASCIIAlpha(ch)) {
611             or32(TrustedImm32(0x20), character);
612             ch |= 0x20;
613         }
614
615         return branch32(NotEqual, character, Imm32(ch));
616     }
617     
618     void storeToFrame(RegisterID reg, unsigned frameLocation)
619     {
620         poke(reg, frameLocation);
621     }
622
623     void storeToFrame(TrustedImm32 imm, unsigned frameLocation)
624     {
625         poke(imm, frameLocation);
626     }
627
628 #if CPU(ARM64) || CPU(X86_64)
629     void storeToFrame(TrustedImmPtr imm, unsigned frameLocation)
630     {
631         poke(imm, frameLocation);
632     }
633 #endif
634
635     DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
636     {
637         return storePtrWithPatch(TrustedImmPtr(nullptr), Address(stackPointerRegister, frameLocation * sizeof(void*)));
638     }
639
640     void loadFromFrame(unsigned frameLocation, RegisterID reg)
641     {
642         peek(reg, frameLocation);
643     }
644
645     void loadFromFrameAndJump(unsigned frameLocation)
646     {
647         farJump(Address(stackPointerRegister, frameLocation * sizeof(void*)), YarrBacktrackPtrTag);
648     }
649
650     unsigned alignCallFrameSizeInBytes(unsigned callFrameSize)
651     {
652         if (!callFrameSize)
653             return 0;
654
655         callFrameSize *= sizeof(void*);
656         if (callFrameSize / sizeof(void*) != m_pattern.m_body->m_callFrameSize)
657             CRASH();
658         callFrameSize = (callFrameSize + 0x3f) & ~0x3f;
659         return callFrameSize;
660     }
661     void initCallFrame()
662     {
663         unsigned callFrameSizeInBytes = alignCallFrameSizeInBytes(m_pattern.m_body->m_callFrameSize);
664         if (callFrameSizeInBytes) {
665 #if CPU(X86_64) || CPU(ARM64)
666             if (Options::zeroStackFrame()) {
667                 // We need to start from the stack pointer, because we could have spilled callee saves
668                 move(stackPointerRegister, regT0);
669                 subPtr(Imm32(callFrameSizeInBytes), stackPointerRegister);
670                 if (callFrameSizeInBytes <= 128) {
671                     for (unsigned offset = 0; offset < callFrameSizeInBytes; offset += sizeof(intptr_t))
672                         storePtr(TrustedImm32(0), Address(regT0, -8 - offset));
673                 } else {
674                     Label zeroLoop = label();
675                     subPtr(TrustedImm32(sizeof(intptr_t) * 2), regT0);
676 #if CPU(ARM64)
677                     storePair64(ARM64Registers::zr, ARM64Registers::zr, regT0);
678 #else
679                     storePtr(TrustedImm32(0), Address(regT0));
680                     storePtr(TrustedImm32(0), Address(regT0, sizeof(intptr_t)));
681 #endif
682                     branchPtr(NotEqual, regT0, stackPointerRegister).linkTo(zeroLoop, this);
683                 }
684             } else
685 #endif
686                 subPtr(Imm32(callFrameSizeInBytes), stackPointerRegister);
687
688         }
689     }
690     void removeCallFrame()
691     {
692         unsigned callFrameSizeInBytes = alignCallFrameSizeInBytes(m_pattern.m_body->m_callFrameSize);
693         if (callFrameSizeInBytes)
694             addPtr(Imm32(callFrameSizeInBytes), stackPointerRegister);
695     }
696
697     void generateFailReturn()
698     {
699         move(TrustedImmPtr((void*)WTF::notFound), returnRegister);
700         move(TrustedImm32(0), returnRegister2);
701         generateReturn();
702     }
703
704     void generateJITFailReturn()
705     {
706         if (m_abortExecution.empty() && m_hitMatchLimit.empty())
707             return;
708
709         JumpList finishExiting;
710         if (!m_abortExecution.empty()) {
711             m_abortExecution.link(this);
712             move(TrustedImmPtr((void*)static_cast<size_t>(-2)), returnRegister);
713             finishExiting.append(jump());
714         }
715
716         if (!m_hitMatchLimit.empty()) {
717             m_hitMatchLimit.link(this);
718             move(TrustedImmPtr((void*)static_cast<size_t>(-1)), returnRegister);
719         }
720
721         finishExiting.link(this);
722         removeCallFrame();
723         move(TrustedImm32(0), returnRegister2);
724         generateReturn();
725     }
726
727     // Used to record subpatterns, should only be called if compileMode is IncludeSubpatterns.
728     void setSubpatternStart(RegisterID reg, unsigned subpattern)
729     {
730         ASSERT(subpattern);
731         // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
732         store32(reg, Address(output, (subpattern << 1) * sizeof(int)));
733     }
734     void setSubpatternEnd(RegisterID reg, unsigned subpattern)
735     {
736         ASSERT(subpattern);
737         // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
738         store32(reg, Address(output, ((subpattern << 1) + 1) * sizeof(int)));
739     }
740     void clearSubpatternStart(unsigned subpattern)
741     {
742         ASSERT(subpattern);
743         // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
744         store32(TrustedImm32(-1), Address(output, (subpattern << 1) * sizeof(int)));
745     }
746
747     void clearMatches(unsigned subpattern, unsigned lastSubpattern)
748     {
749         for (; subpattern <= lastSubpattern; subpattern++)
750             clearSubpatternStart(subpattern);
751     }
752
753     // We use one of three different strategies to track the start of the current match,
754     // while matching.
755     // 1) If the pattern has a fixed size, do nothing! - we calculate the value lazily
756     //    at the end of matching. This is irrespective of compileMode, and in this case
757     //    these methods should never be called.
758     // 2) If we're compiling IncludeSubpatterns, 'output' contains a pointer to an output
759     //    vector, store the match start in the output vector.
760     // 3) If we're compiling MatchOnly, 'output' is unused, store the match start directly
761     //    in this register.
762     void setMatchStart(RegisterID reg)
763     {
764         ASSERT(!m_pattern.m_body->m_hasFixedSize);
765         if (compileMode == IncludeSubpatterns)
766             store32(reg, output);
767         else
768             move(reg, output);
769     }
770     void getMatchStart(RegisterID reg)
771     {
772         ASSERT(!m_pattern.m_body->m_hasFixedSize);
773         if (compileMode == IncludeSubpatterns)
774             load32(output, reg);
775         else
776             move(output, reg);
777     }
778
779     enum YarrOpCode : uint8_t {
780         // These nodes wrap body alternatives - those in the main disjunction,
781         // rather than subpatterns or assertions. These are chained together in
782         // a doubly linked list, with a 'begin' node for the first alternative,
783         // a 'next' node for each subsequent alternative, and an 'end' node at
784         // the end. In the case of repeating alternatives, the 'end' node also
785         // has a reference back to 'begin'.
786         OpBodyAlternativeBegin,
787         OpBodyAlternativeNext,
788         OpBodyAlternativeEnd,
789         // Similar to the body alternatives, but used for subpatterns with two
790         // or more alternatives.
791         OpNestedAlternativeBegin,
792         OpNestedAlternativeNext,
793         OpNestedAlternativeEnd,
794         // Used for alternatives in subpatterns where there is only a single
795         // alternative (backtracking is easier in these cases), or for alternatives
796         // which never need to be backtracked (those in parenthetical assertions,
797         // terminal subpatterns).
798         OpSimpleNestedAlternativeBegin,
799         OpSimpleNestedAlternativeNext,
800         OpSimpleNestedAlternativeEnd,
801         // Used to wrap 'Once' subpattern matches (quantityMaxCount == 1).
802         OpParenthesesSubpatternOnceBegin,
803         OpParenthesesSubpatternOnceEnd,
804         // Used to wrap 'Terminal' subpattern matches (at the end of the regexp).
805         OpParenthesesSubpatternTerminalBegin,
806         OpParenthesesSubpatternTerminalEnd,
807         // Used to wrap generic captured matches
808         OpParenthesesSubpatternBegin,
809         OpParenthesesSubpatternEnd,
810         // Used to wrap parenthetical assertions.
811         OpParentheticalAssertionBegin,
812         OpParentheticalAssertionEnd,
813         // Wraps all simple terms (pattern characters, character classes).
814         OpTerm,
815         // Where an expression contains only 'once through' body alternatives
816         // and no repeating ones, this op is used to return match failure.
817         OpMatchFailed
818     };
819
820     // This structure is used to hold the compiled opcode information,
821     // including reference back to the original PatternTerm/PatternAlternatives,
822     // and JIT compilation data structures.
823     struct YarrOp {
824         explicit YarrOp(PatternTerm* term)
825             : m_term(term)
826             , m_op(OpTerm)
827             , m_isDeadCode(false)
828         {
829         }
830
831         explicit YarrOp(YarrOpCode op)
832             : m_op(op)
833             , m_isDeadCode(false)
834         {
835         }
836
837         // For alternatives, this holds the PatternAlternative and doubly linked
838         // references to this alternative's siblings. In the case of the
839         // OpBodyAlternativeEnd node at the end of a section of repeating nodes,
840         // m_nextOp will reference the OpBodyAlternativeBegin node of the first
841         // repeating alternative.
842         PatternAlternative* m_alternative;
843         size_t m_previousOp;
844         size_t m_nextOp;
845         
846         // The operation, as a YarrOpCode, and also a reference to the PatternTerm.
847         PatternTerm* m_term;
848         YarrOpCode m_op;
849
850         // Used to record a set of Jumps out of the generated code, typically
851         // used for jumps out to backtracking code, and a single reentry back
852         // into the code for a node (likely where a backtrack will trigger
853         // rematching).
854         Label m_reentry;
855         JumpList m_jumps;
856
857         // Used for backtracking when the prior alternative did not consume any
858         // characters but matched.
859         Jump m_zeroLengthMatch;
860
861         // This flag is used to null out the second pattern character, when
862         // two are fused to match a pair together.
863         bool m_isDeadCode;
864
865         // Currently used in the case of some of the more complex management of
866         // 'm_checkedOffset', to cache the offset used in this alternative, to avoid
867         // recalculating it.
868         Checked<unsigned> m_checkAdjust;
869
870         // Used by OpNestedAlternativeNext/End to hold the pointer to the
871         // value that will be pushed into the pattern's frame to return to,
872         // upon backtracking back into the disjunction.
873         DataLabelPtr m_returnAddress;
874     };
875
876     // BacktrackingState
877     // This class encapsulates information about the state of code generation
878     // whilst generating the code for backtracking, when a term fails to match.
879     // Upon entry to code generation of the backtracking code for a given node,
880     // the Backtracking state will hold references to all control flow sources
881     // that are outputs in need of further backtracking from the prior node
882     // generated (which is the subsequent operation in the regular expression,
883     // and in the m_ops Vector, since we generated backtracking backwards).
884     // These references to control flow take the form of:
885     //  - A jump list of jumps, to be linked to code that will backtrack them
886     //    further.
887     //  - A set of DataLabelPtr values, to be populated with values to be
888     //    treated effectively as return addresses backtracking into complex
889     //    subpatterns.
890     //  - A flag indicating that the current sequence of generated code up to
891     //    this point requires backtracking.
892     class BacktrackingState {
893     public:
894         BacktrackingState()
895             : m_pendingFallthrough(false)
896         {
897         }
898
899         // Add a jump or jumps, a return address, or set the flag indicating
900         // that the current 'fallthrough' control flow requires backtracking.
901         void append(const Jump& jump)
902         {
903             m_laterFailures.append(jump);
904         }
905         void append(JumpList& jumpList)
906         {
907             m_laterFailures.append(jumpList);
908         }
909         void append(const DataLabelPtr& returnAddress)
910         {
911             m_pendingReturns.append(returnAddress);
912         }
913         void fallthrough()
914         {
915             ASSERT(!m_pendingFallthrough);
916             m_pendingFallthrough = true;
917         }
918
919         // These methods clear the backtracking state, either linking to the
920         // current location, a provided label, or copying the backtracking out
921         // to a JumpList. All actions may require code generation to take place,
922         // and as such are passed a pointer to the assembler.
923         void link(MacroAssembler* assembler)
924         {
925             if (m_pendingReturns.size()) {
926                 Label here(assembler);
927                 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
928                     m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
929                 m_pendingReturns.clear();
930             }
931             m_laterFailures.link(assembler);
932             m_laterFailures.clear();
933             m_pendingFallthrough = false;
934         }
935         void linkTo(Label label, MacroAssembler* assembler)
936         {
937             if (m_pendingReturns.size()) {
938                 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
939                     m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], label));
940                 m_pendingReturns.clear();
941             }
942             if (m_pendingFallthrough)
943                 assembler->jump(label);
944             m_laterFailures.linkTo(label, assembler);
945             m_laterFailures.clear();
946             m_pendingFallthrough = false;
947         }
948         void takeBacktracksToJumpList(JumpList& jumpList, MacroAssembler* assembler)
949         {
950             if (m_pendingReturns.size()) {
951                 Label here(assembler);
952                 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
953                     m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
954                 m_pendingReturns.clear();
955                 m_pendingFallthrough = true;
956             }
957             if (m_pendingFallthrough)
958                 jumpList.append(assembler->jump());
959             jumpList.append(m_laterFailures);
960             m_laterFailures.clear();
961             m_pendingFallthrough = false;
962         }
963
964         bool isEmpty()
965         {
966             return m_laterFailures.empty() && m_pendingReturns.isEmpty() && !m_pendingFallthrough;
967         }
968
969         // Called at the end of code generation to link all return addresses.
970         void linkDataLabels(LinkBuffer& linkBuffer)
971         {
972             ASSERT(isEmpty());
973             for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
974                 linkBuffer.patch(m_backtrackRecords[i].m_dataLabel, linkBuffer.locationOf<YarrBacktrackPtrTag>(m_backtrackRecords[i].m_backtrackLocation));
975         }
976
977     private:
978         struct ReturnAddressRecord {
979             ReturnAddressRecord(DataLabelPtr dataLabel, Label backtrackLocation)
980                 : m_dataLabel(dataLabel)
981                 , m_backtrackLocation(backtrackLocation)
982             {
983             }
984
985             DataLabelPtr m_dataLabel;
986             Label m_backtrackLocation;
987         };
988
989         JumpList m_laterFailures;
990         bool m_pendingFallthrough;
991         Vector<DataLabelPtr, 4> m_pendingReturns;
992         Vector<ReturnAddressRecord, 4> m_backtrackRecords;
993     };
994
995     // Generation methods:
996     // ===================
997
998     // This method provides a default implementation of backtracking common
999     // to many terms; terms commonly jump out of the forwards  matching path
1000     // on any failed conditions, and add these jumps to the m_jumps list. If
1001     // no special handling is required we can often just backtrack to m_jumps.
1002     void backtrackTermDefault(size_t opIndex)
1003     {
1004         YarrOp& op = m_ops[opIndex];
1005         m_backtrackingState.append(op.m_jumps);
1006     }
1007
1008     void generateAssertionBOL(size_t opIndex)
1009     {
1010         YarrOp& op = m_ops[opIndex];
1011         PatternTerm* term = op.m_term;
1012
1013         if (m_pattern.multiline()) {
1014             const RegisterID character = regT0;
1015
1016             JumpList matchDest;
1017             if (!term->inputPosition)
1018                 matchDest.append(branch32(Equal, index, Imm32(m_checkedOffset.unsafeGet())));
1019
1020             readCharacter(m_checkedOffset - term->inputPosition + 1, character);
1021             matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
1022             op.m_jumps.append(jump());
1023
1024             matchDest.link(this);
1025         } else {
1026             // Erk, really should poison out these alternatives early. :-/
1027             if (term->inputPosition)
1028                 op.m_jumps.append(jump());
1029             else
1030                 op.m_jumps.append(branch32(NotEqual, index, Imm32(m_checkedOffset.unsafeGet())));
1031         }
1032     }
1033     void backtrackAssertionBOL(size_t opIndex)
1034     {
1035         backtrackTermDefault(opIndex);
1036     }
1037
1038     void generateAssertionEOL(size_t opIndex)
1039     {
1040         YarrOp& op = m_ops[opIndex];
1041         PatternTerm* term = op.m_term;
1042
1043         if (m_pattern.multiline()) {
1044             const RegisterID character = regT0;
1045
1046             JumpList matchDest;
1047             if (term->inputPosition == m_checkedOffset.unsafeGet())
1048                 matchDest.append(atEndOfInput());
1049
1050             readCharacter(m_checkedOffset - term->inputPosition, character);
1051             matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
1052             op.m_jumps.append(jump());
1053
1054             matchDest.link(this);
1055         } else {
1056             if (term->inputPosition == m_checkedOffset.unsafeGet())
1057                 op.m_jumps.append(notAtEndOfInput());
1058             // Erk, really should poison out these alternatives early. :-/
1059             else
1060                 op.m_jumps.append(jump());
1061         }
1062     }
1063     void backtrackAssertionEOL(size_t opIndex)
1064     {
1065         backtrackTermDefault(opIndex);
1066     }
1067
1068     // Also falls though on nextIsNotWordChar.
1069     void matchAssertionWordchar(size_t opIndex, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
1070     {
1071         YarrOp& op = m_ops[opIndex];
1072         PatternTerm* term = op.m_term;
1073
1074         const RegisterID character = regT0;
1075
1076         if (term->inputPosition == m_checkedOffset.unsafeGet())
1077             nextIsNotWordChar.append(atEndOfInput());
1078
1079         readCharacter(m_checkedOffset - term->inputPosition, character);
1080
1081         CharacterClass* wordcharCharacterClass;
1082
1083         if (m_unicodeIgnoreCase)
1084             wordcharCharacterClass = m_pattern.wordUnicodeIgnoreCaseCharCharacterClass();
1085         else
1086             wordcharCharacterClass = m_pattern.wordcharCharacterClass();
1087
1088         matchCharacterClass(character, nextIsWordChar, wordcharCharacterClass);
1089     }
1090
1091     void generateAssertionWordBoundary(size_t opIndex)
1092     {
1093         YarrOp& op = m_ops[opIndex];
1094         PatternTerm* term = op.m_term;
1095
1096         const RegisterID character = regT0;
1097
1098         Jump atBegin;
1099         JumpList matchDest;
1100         if (!term->inputPosition)
1101             atBegin = branch32(Equal, index, Imm32(m_checkedOffset.unsafeGet()));
1102         readCharacter(m_checkedOffset - term->inputPosition + 1, character);
1103
1104         CharacterClass* wordcharCharacterClass;
1105
1106         if (m_unicodeIgnoreCase)
1107             wordcharCharacterClass = m_pattern.wordUnicodeIgnoreCaseCharCharacterClass();
1108         else
1109             wordcharCharacterClass = m_pattern.wordcharCharacterClass();
1110
1111         matchCharacterClass(character, matchDest, wordcharCharacterClass);
1112         if (!term->inputPosition)
1113             atBegin.link(this);
1114
1115         // We fall through to here if the last character was not a wordchar.
1116         JumpList nonWordCharThenWordChar;
1117         JumpList nonWordCharThenNonWordChar;
1118         if (term->invert()) {
1119             matchAssertionWordchar(opIndex, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
1120             nonWordCharThenWordChar.append(jump());
1121         } else {
1122             matchAssertionWordchar(opIndex, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
1123             nonWordCharThenNonWordChar.append(jump());
1124         }
1125         op.m_jumps.append(nonWordCharThenNonWordChar);
1126
1127         // We jump here if the last character was a wordchar.
1128         matchDest.link(this);
1129         JumpList wordCharThenWordChar;
1130         JumpList wordCharThenNonWordChar;
1131         if (term->invert()) {
1132             matchAssertionWordchar(opIndex, wordCharThenNonWordChar, wordCharThenWordChar);
1133             wordCharThenWordChar.append(jump());
1134         } else {
1135             matchAssertionWordchar(opIndex, wordCharThenWordChar, wordCharThenNonWordChar);
1136             // This can fall-though!
1137         }
1138
1139         op.m_jumps.append(wordCharThenWordChar);
1140
1141         nonWordCharThenWordChar.link(this);
1142         wordCharThenNonWordChar.link(this);
1143     }
1144     void backtrackAssertionWordBoundary(size_t opIndex)
1145     {
1146         backtrackTermDefault(opIndex);
1147     }
1148
1149 #if ENABLE(YARR_JIT_BACKREFERENCES)
1150     void matchBackreference(size_t opIndex, JumpList& characterMatchFails, RegisterID character, RegisterID patternIndex, RegisterID patternCharacter)
1151     {
1152         YarrOp& op = m_ops[opIndex];
1153         PatternTerm* term = op.m_term;
1154         unsigned subpatternId = term->backReferenceSubpatternId;
1155
1156         Label loop(this);
1157
1158         readCharacterDontDecodeSurrogates(0, patternCharacter, patternIndex);
1159         readCharacterDontDecodeSurrogates(m_checkedOffset - term->inputPosition, character);
1160     
1161         if (!m_pattern.ignoreCase())
1162             characterMatchFails.append(branch32(NotEqual, character, patternCharacter));
1163         else {
1164             Jump charactersMatch = branch32(Equal, character, patternCharacter);
1165             ExtendedAddress characterTableEntry(character, reinterpret_cast<intptr_t>(&canonicalTableLChar));
1166             load16(characterTableEntry, character);
1167             ExtendedAddress patternTableEntry(patternCharacter, reinterpret_cast<intptr_t>(&canonicalTableLChar));
1168             load16(patternTableEntry, patternCharacter);
1169             characterMatchFails.append(branch32(NotEqual, character, patternCharacter));
1170             charactersMatch.link(this);
1171         }
1172
1173         
1174         add32(TrustedImm32(1), index);
1175         add32(TrustedImm32(1), patternIndex);
1176         
1177         branch32(NotEqual, patternIndex, Address(output, ((subpatternId << 1) + 1) * sizeof(int))).linkTo(loop, this);
1178     }
1179
1180     void generateBackReference(size_t opIndex)
1181     {
1182         YarrOp& op = m_ops[opIndex];
1183         PatternTerm* term = op.m_term;
1184
1185         if (m_pattern.ignoreCase() && m_charSize != Char8) {
1186             m_failureReason = JITFailureReason::BackReference;
1187             return;
1188         }
1189
1190         unsigned subpatternId = term->backReferenceSubpatternId;
1191         unsigned parenthesesFrameLocation = term->frameLocation;
1192
1193         const RegisterID characterOrTemp = regT0;
1194         const RegisterID patternIndex = regT1;
1195         const RegisterID patternTemp = regT2;
1196
1197         storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
1198         if (term->quantityType != QuantifierFixedCount || term->quantityMaxCount != 1)
1199             storeToFrame(TrustedImm32(0), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1200
1201         JumpList matches;
1202
1203         if (term->quantityType != QuantifierNonGreedy) {
1204             load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
1205             load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
1206
1207             // An empty match is successful without consuming characters
1208             if (term->quantityType != QuantifierFixedCount || term->quantityMaxCount != 1) {
1209                 matches.append(branch32(Equal, TrustedImm32(-1), patternIndex));
1210                 matches.append(branch32(Equal, patternIndex, patternTemp));
1211             } else {
1212                 Jump zeroLengthMatch = branch32(Equal, TrustedImm32(-1), patternIndex);
1213                 Jump tryNonZeroMatch = branch32(NotEqual, patternIndex, patternTemp);
1214                 zeroLengthMatch.link(this);
1215                 storeToFrame(TrustedImm32(1), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1216                 matches.append(jump());
1217                 tryNonZeroMatch.link(this);
1218             }
1219         }
1220
1221         switch (term->quantityType) {
1222         case QuantifierFixedCount: {
1223             Label outerLoop(this);
1224
1225             // PatternTemp should contain pattern end index at this point
1226             sub32(patternIndex, patternTemp);
1227             op.m_jumps.append(checkNotEnoughInput(patternTemp));
1228
1229             matchBackreference(opIndex, op.m_jumps, characterOrTemp, patternIndex, patternTemp);
1230
1231             if (term->quantityMaxCount != 1) {
1232                 loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), characterOrTemp);
1233                 add32(TrustedImm32(1), characterOrTemp);
1234                 storeToFrame(characterOrTemp, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1235                 matches.append(branch32(Equal, Imm32(term->quantityMaxCount.unsafeGet()), characterOrTemp));
1236                 load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
1237                 load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
1238                 jump(outerLoop);
1239             }
1240             matches.link(this);
1241             break;
1242         }
1243
1244         case QuantifierGreedy: {
1245             JumpList incompleteMatches;
1246
1247             Label outerLoop(this);
1248
1249             // PatternTemp should contain pattern end index at this point
1250             sub32(patternIndex, patternTemp);
1251             matches.append(checkNotEnoughInput(patternTemp));
1252
1253             matchBackreference(opIndex, incompleteMatches, characterOrTemp, patternIndex, patternTemp);
1254
1255             loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), characterOrTemp);
1256             add32(TrustedImm32(1), characterOrTemp);
1257             storeToFrame(characterOrTemp, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1258             if (term->quantityMaxCount != quantifyInfinite)
1259                 matches.append(branch32(Equal, Imm32(term->quantityMaxCount.unsafeGet()), characterOrTemp));
1260             load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
1261             load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
1262
1263             // Store current index in frame for restoring after a partial match
1264             storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
1265             jump(outerLoop);
1266
1267             incompleteMatches.link(this);
1268             loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), index);
1269
1270             matches.link(this);
1271             op.m_reentry = label();
1272             break;
1273         }
1274
1275         case QuantifierNonGreedy: {
1276             JumpList incompleteMatches;
1277
1278             matches.append(jump());
1279
1280             op.m_reentry = label();
1281
1282             load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
1283             load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
1284
1285             // An empty match is successful without consuming characters
1286             Jump zeroLengthMatch = branch32(Equal, TrustedImm32(-1), patternIndex);
1287             Jump tryNonZeroMatch = branch32(NotEqual, patternIndex, patternTemp);
1288             zeroLengthMatch.link(this);
1289             storeToFrame(TrustedImm32(1), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1290             matches.append(jump());
1291             tryNonZeroMatch.link(this);
1292
1293             // Check if we have input remaining to match
1294             sub32(patternIndex, patternTemp);
1295             matches.append(checkNotEnoughInput(patternTemp));
1296
1297             storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
1298
1299             matchBackreference(opIndex, incompleteMatches, characterOrTemp, patternIndex, patternTemp);
1300
1301             matches.append(jump());
1302
1303             incompleteMatches.link(this);
1304             loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), index);
1305
1306             matches.link(this);
1307             break;
1308         }
1309         }
1310     }
1311     void backtrackBackReference(size_t opIndex)
1312     {
1313         YarrOp& op = m_ops[opIndex];
1314         PatternTerm* term = op.m_term;
1315
1316         unsigned subpatternId = term->backReferenceSubpatternId;
1317
1318         m_backtrackingState.link(this);
1319         op.m_jumps.link(this);
1320
1321         JumpList failures;
1322
1323         unsigned parenthesesFrameLocation = term->frameLocation;
1324         switch (term->quantityType) {
1325         case QuantifierFixedCount:
1326             loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), index);
1327             break;
1328
1329         case QuantifierGreedy: {
1330             const RegisterID matchAmount = regT0;
1331             const RegisterID patternStartIndex = regT1;
1332             const RegisterID patternEndIndexOrLen = regT2;
1333
1334             loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), matchAmount);
1335             failures.append(branchTest32(Zero, matchAmount));
1336
1337             load32(Address(output, (subpatternId << 1) * sizeof(int)), patternStartIndex);
1338             load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternEndIndexOrLen);
1339             sub32(patternStartIndex, patternEndIndexOrLen);
1340             sub32(patternEndIndexOrLen, index);
1341
1342             sub32(TrustedImm32(1), matchAmount);
1343             storeToFrame(matchAmount, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1344             jump(op.m_reentry);
1345             break;
1346         }
1347
1348         case QuantifierNonGreedy: {
1349             const RegisterID matchAmount = regT0;
1350
1351             failures.append(atEndOfInput());
1352             loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), matchAmount);
1353             if (term->quantityMaxCount != quantifyInfinite)
1354                 failures.append(branch32(AboveOrEqual, Imm32(term->quantityMaxCount.unsafeGet()), matchAmount));
1355             add32(TrustedImm32(1), matchAmount);
1356             storeToFrame(matchAmount, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1357             jump(op.m_reentry);
1358             break;
1359         }
1360         }
1361         failures.link(this);
1362         m_backtrackingState.fallthrough();
1363     }
1364 #endif
1365
1366     void generatePatternCharacterOnce(size_t opIndex)
1367     {
1368         YarrOp& op = m_ops[opIndex];
1369
1370         if (op.m_isDeadCode)
1371             return;
1372         
1373         // m_ops always ends with a OpBodyAlternativeEnd or OpMatchFailed
1374         // node, so there must always be at least one more node.
1375         ASSERT(opIndex + 1 < m_ops.size());
1376         YarrOp* nextOp = &m_ops[opIndex + 1];
1377
1378         PatternTerm* term = op.m_term;
1379         UChar32 ch = term->patternCharacter;
1380
1381         if (!isLatin1(ch) && (m_charSize == Char8)) {
1382             // Have a 16 bit pattern character and an 8 bit string - short circuit
1383             op.m_jumps.append(jump());
1384             return;
1385         }
1386
1387         const RegisterID character = regT0;
1388 #if CPU(X86_64) || CPU(ARM64)
1389         unsigned maxCharactersAtOnce = m_charSize == Char8 ? 8 : 4;
1390 #else
1391         unsigned maxCharactersAtOnce = m_charSize == Char8 ? 4 : 2;
1392 #endif
1393         uint64_t ignoreCaseMask = 0;
1394 #if CPU(BIG_ENDIAN)
1395         uint64_t allCharacters = ch << (m_charSize == Char8 ? 24 : 16);
1396 #else
1397         uint64_t allCharacters = ch;
1398 #endif
1399         unsigned numberCharacters;
1400         unsigned startTermPosition = term->inputPosition;
1401
1402         // For case-insesitive compares, non-ascii characters that have different
1403         // upper & lower case representations are converted to a character class.
1404         ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(ch) || isCanonicallyUnique(ch, m_canonicalMode));
1405
1406         if (m_pattern.ignoreCase() && isASCIIAlpha(ch)) {
1407 #if CPU(BIG_ENDIAN)
1408             ignoreCaseMask |= 32 << (m_charSize == Char8 ? 24 : 16);
1409 #else
1410             ignoreCaseMask |= 32;
1411 #endif
1412         }
1413
1414         for (numberCharacters = 1; numberCharacters < maxCharactersAtOnce && nextOp->m_op == OpTerm; ++numberCharacters, nextOp = &m_ops[opIndex + numberCharacters]) {
1415             PatternTerm* nextTerm = nextOp->m_term;
1416
1417             // YarrJIT handles decoded surrogate pair as one character if unicode flag is enabled.
1418             // Note that the numberCharacters become 1 while the width of the pattern character becomes 32bit in this case.
1419             if (nextTerm->type != PatternTerm::TypePatternCharacter
1420                 || nextTerm->quantityType != QuantifierFixedCount
1421                 || nextTerm->quantityMaxCount != 1
1422                 || nextTerm->inputPosition != (startTermPosition + numberCharacters)
1423                 || (U16_LENGTH(nextTerm->patternCharacter) != 1 && m_decodeSurrogatePairs))
1424                 break;
1425
1426             nextOp->m_isDeadCode = true;
1427
1428 #if CPU(BIG_ENDIAN)
1429             int shiftAmount = (m_charSize == Char8 ? 24 : 16) - ((m_charSize == Char8 ? 8 : 16) * numberCharacters);
1430 #else
1431             int shiftAmount = (m_charSize == Char8 ? 8 : 16) * numberCharacters;
1432 #endif
1433
1434             UChar32 currentCharacter = nextTerm->patternCharacter;
1435
1436             if (!isLatin1(currentCharacter) && (m_charSize == Char8)) {
1437                 // Have a 16 bit pattern character and an 8 bit string - short circuit
1438                 op.m_jumps.append(jump());
1439                 return;
1440             }
1441
1442             // For case-insesitive compares, non-ascii characters that have different
1443             // upper & lower case representations are converted to a character class.
1444             ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(currentCharacter) || isCanonicallyUnique(currentCharacter, m_canonicalMode));
1445
1446             allCharacters |= (static_cast<uint64_t>(currentCharacter) << shiftAmount);
1447
1448             if ((m_pattern.ignoreCase()) && (isASCIIAlpha(currentCharacter)))
1449                 ignoreCaseMask |= 32ULL << shiftAmount;
1450         }
1451
1452         if (m_decodeSurrogatePairs)
1453             op.m_jumps.append(jumpIfNoAvailableInput());
1454
1455         if (m_charSize == Char8) {
1456             auto check1 = [&] (Checked<unsigned> offset, UChar32 characters) {
1457                 op.m_jumps.append(jumpIfCharNotEquals(characters, offset, character));
1458             };
1459
1460             auto check2 = [&] (Checked<unsigned> offset, uint16_t characters, uint16_t mask) {
1461                 load16Unaligned(negativeOffsetIndexedAddress(offset, character), character);
1462                 if (mask)
1463                     or32(Imm32(mask), character);
1464                 op.m_jumps.append(branch32(NotEqual, character, Imm32(characters | mask)));
1465             };
1466
1467             auto check4 = [&] (Checked<unsigned> offset, unsigned characters, unsigned mask) {
1468                 if (mask) {
1469                     load32WithUnalignedHalfWords(negativeOffsetIndexedAddress(offset, character), character);
1470                     if (mask)
1471                         or32(Imm32(mask), character);
1472                     op.m_jumps.append(branch32(NotEqual, character, Imm32(characters | mask)));
1473                     return;
1474                 }
1475                 op.m_jumps.append(branch32WithUnalignedHalfWords(NotEqual, negativeOffsetIndexedAddress(offset, character), TrustedImm32(characters)));
1476             };
1477
1478 #if CPU(X86_64) || CPU(ARM64)
1479             auto check8 = [&] (Checked<unsigned> offset, uint64_t characters, uint64_t mask) {
1480                 load64(negativeOffsetIndexedAddress(offset, character), character);
1481                 if (mask)
1482                     or64(TrustedImm64(mask), character);
1483                 op.m_jumps.append(branch64(NotEqual, character, TrustedImm64(characters | mask)));
1484             };
1485 #endif
1486
1487             switch (numberCharacters) {
1488             case 1:
1489                 // Use 32bit width of allCharacters since Yarr counts surrogate pairs as one character with unicode flag.
1490                 check1(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff);
1491                 return;
1492             case 2: {
1493                 check2(m_checkedOffset - startTermPosition, allCharacters & 0xffff, ignoreCaseMask & 0xffff);
1494                 return;
1495             }
1496             case 3: {
1497                 check2(m_checkedOffset - startTermPosition, allCharacters & 0xffff, ignoreCaseMask & 0xffff);
1498                 check1(m_checkedOffset - startTermPosition - 2, (allCharacters >> 16) & 0xff);
1499                 return;
1500             }
1501             case 4: {
1502                 check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1503                 return;
1504             }
1505 #if CPU(X86_64) || CPU(ARM64)
1506             case 5: {
1507                 check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1508                 check1(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xff);
1509                 return;
1510             }
1511             case 6: {
1512                 check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1513                 check2(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xffff, (ignoreCaseMask >> 32) & 0xffff);
1514                 return;
1515             }
1516             case 7: {
1517                 check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1518                 check2(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xffff, (ignoreCaseMask >> 32) & 0xffff);
1519                 check1(m_checkedOffset - startTermPosition - 6, (allCharacters >> 48) & 0xff);
1520                 return;
1521             }
1522             case 8: {
1523                 check8(m_checkedOffset - startTermPosition, allCharacters, ignoreCaseMask);
1524                 return;
1525             }
1526 #endif
1527             }
1528         } else {
1529             auto check1 = [&] (Checked<unsigned> offset, UChar32 characters) {
1530                 op.m_jumps.append(jumpIfCharNotEquals(characters, offset, character));
1531             };
1532
1533             auto check2 = [&] (Checked<unsigned> offset, unsigned characters, unsigned mask) {
1534                 if (mask) {
1535                     load32WithUnalignedHalfWords(negativeOffsetIndexedAddress(offset, character), character);
1536                     if (mask)
1537                         or32(Imm32(mask), character);
1538                     op.m_jumps.append(branch32(NotEqual, character, Imm32(characters | mask)));
1539                     return;
1540                 }
1541                 op.m_jumps.append(branch32WithUnalignedHalfWords(NotEqual, negativeOffsetIndexedAddress(offset, character), TrustedImm32(characters)));
1542             };
1543
1544 #if CPU(X86_64) || CPU(ARM64)
1545             auto check4 = [&] (Checked<unsigned> offset, uint64_t characters, uint64_t mask) {
1546                 load64(negativeOffsetIndexedAddress(offset, character), character);
1547                 if (mask)
1548                     or64(TrustedImm64(mask), character);
1549                 op.m_jumps.append(branch64(NotEqual, character, TrustedImm64(characters | mask)));
1550             };
1551 #endif
1552
1553             switch (numberCharacters) {
1554             case 1:
1555                 // Use 32bit width of allCharacters since Yarr counts surrogate pairs as one character with unicode flag.
1556                 check1(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff);
1557                 return;
1558             case 2:
1559                 check2(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1560                 return;
1561 #if CPU(X86_64) || CPU(ARM64)
1562             case 3:
1563                 check2(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1564                 check1(m_checkedOffset - startTermPosition - 2, (allCharacters >> 32) & 0xffff);
1565                 return;
1566             case 4:
1567                 check4(m_checkedOffset - startTermPosition, allCharacters, ignoreCaseMask);
1568                 return;
1569 #endif
1570             }
1571         }
1572     }
1573     void backtrackPatternCharacterOnce(size_t opIndex)
1574     {
1575         backtrackTermDefault(opIndex);
1576     }
1577
1578     void generatePatternCharacterFixed(size_t opIndex)
1579     {
1580         YarrOp& op = m_ops[opIndex];
1581         PatternTerm* term = op.m_term;
1582         UChar32 ch = term->patternCharacter;
1583
1584         const RegisterID character = regT0;
1585         const RegisterID countRegister = regT1;
1586
1587         if (m_decodeSurrogatePairs)
1588             op.m_jumps.append(jumpIfNoAvailableInput());
1589
1590         move(index, countRegister);
1591         Checked<unsigned> scaledMaxCount = term->quantityMaxCount;
1592         scaledMaxCount *= U_IS_BMP(ch) ? 1 : 2;
1593         sub32(Imm32(scaledMaxCount.unsafeGet()), countRegister);
1594
1595         Label loop(this);
1596         readCharacter(m_checkedOffset - term->inputPosition - scaledMaxCount, character, countRegister);
1597         // For case-insesitive compares, non-ascii characters that have different
1598         // upper & lower case representations are converted to a character class.
1599         ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(ch) || isCanonicallyUnique(ch, m_canonicalMode));
1600         if (m_pattern.ignoreCase() && isASCIIAlpha(ch)) {
1601             or32(TrustedImm32(0x20), character);
1602             ch |= 0x20;
1603         }
1604
1605         op.m_jumps.append(branch32(NotEqual, character, Imm32(ch)));
1606 #ifdef JIT_UNICODE_EXPRESSIONS
1607         if (m_decodeSurrogatePairs && !U_IS_BMP(ch))
1608             add32(TrustedImm32(2), countRegister);
1609         else
1610 #endif
1611             add32(TrustedImm32(1), countRegister);
1612         branch32(NotEqual, countRegister, index).linkTo(loop, this);
1613     }
1614     void backtrackPatternCharacterFixed(size_t opIndex)
1615     {
1616         backtrackTermDefault(opIndex);
1617     }
1618
1619     void generatePatternCharacterGreedy(size_t opIndex)
1620     {
1621         YarrOp& op = m_ops[opIndex];
1622         PatternTerm* term = op.m_term;
1623         UChar32 ch = term->patternCharacter;
1624
1625         const RegisterID character = regT0;
1626         const RegisterID countRegister = regT1;
1627
1628         move(TrustedImm32(0), countRegister);
1629
1630         // Unless have a 16 bit pattern character and an 8 bit string - short circuit
1631         if (!(!isLatin1(ch) && (m_charSize == Char8))) {
1632             JumpList failures;
1633             Label loop(this);
1634             failures.append(atEndOfInput());
1635             failures.append(jumpIfCharNotEquals(ch, m_checkedOffset - term->inputPosition, character));
1636
1637             add32(TrustedImm32(1), index);
1638 #ifdef JIT_UNICODE_EXPRESSIONS
1639             if (m_decodeSurrogatePairs && !U_IS_BMP(ch)) {
1640                 Jump surrogatePairOk = notAtEndOfInput();
1641                 sub32(TrustedImm32(1), index);
1642                 failures.append(jump());
1643                 surrogatePairOk.link(this);
1644                 add32(TrustedImm32(1), index);
1645             }
1646 #endif
1647             add32(TrustedImm32(1), countRegister);
1648
1649             if (term->quantityMaxCount == quantifyInfinite)
1650                 jump(loop);
1651             else
1652                 branch32(NotEqual, countRegister, Imm32(term->quantityMaxCount.unsafeGet())).linkTo(loop, this);
1653
1654             failures.link(this);
1655         }
1656         op.m_reentry = label();
1657
1658         storeToFrame(countRegister, term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex());
1659     }
1660     void backtrackPatternCharacterGreedy(size_t opIndex)
1661     {
1662         YarrOp& op = m_ops[opIndex];
1663         PatternTerm* term = op.m_term;
1664
1665         const RegisterID countRegister = regT1;
1666
1667         m_backtrackingState.link(this);
1668
1669         loadFromFrame(term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex(), countRegister);
1670         m_backtrackingState.append(branchTest32(Zero, countRegister));
1671         sub32(TrustedImm32(1), countRegister);
1672         if (!m_decodeSurrogatePairs || U_IS_BMP(term->patternCharacter))
1673             sub32(TrustedImm32(1), index);
1674         else
1675             sub32(TrustedImm32(2), index);
1676         jump(op.m_reentry);
1677     }
1678
1679     void generatePatternCharacterNonGreedy(size_t opIndex)
1680     {
1681         YarrOp& op = m_ops[opIndex];
1682         PatternTerm* term = op.m_term;
1683
1684         const RegisterID countRegister = regT1;
1685
1686         move(TrustedImm32(0), countRegister);
1687         op.m_reentry = label();
1688         storeToFrame(countRegister, term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex());
1689     }
1690     void backtrackPatternCharacterNonGreedy(size_t opIndex)
1691     {
1692         YarrOp& op = m_ops[opIndex];
1693         PatternTerm* term = op.m_term;
1694         UChar32 ch = term->patternCharacter;
1695
1696         const RegisterID character = regT0;
1697         const RegisterID countRegister = regT1;
1698
1699         m_backtrackingState.link(this);
1700
1701         loadFromFrame(term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex(), countRegister);
1702
1703         // Unless have a 16 bit pattern character and an 8 bit string - short circuit
1704         if (!(!isLatin1(ch) && (m_charSize == Char8))) {
1705             JumpList nonGreedyFailures;
1706             nonGreedyFailures.append(atEndOfInput());
1707             if (term->quantityMaxCount != quantifyInfinite)
1708                 nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityMaxCount.unsafeGet())));
1709             nonGreedyFailures.append(jumpIfCharNotEquals(ch, m_checkedOffset - term->inputPosition, character));
1710
1711             add32(TrustedImm32(1), index);
1712 #ifdef JIT_UNICODE_EXPRESSIONS
1713             if (m_decodeSurrogatePairs && !U_IS_BMP(ch)) {
1714                 Jump surrogatePairOk = notAtEndOfInput();
1715                 sub32(TrustedImm32(1), index);
1716                 nonGreedyFailures.append(jump());
1717                 surrogatePairOk.link(this);
1718                 add32(TrustedImm32(1), index);
1719             }
1720 #endif
1721             add32(TrustedImm32(1), countRegister);
1722
1723             jump(op.m_reentry);
1724             nonGreedyFailures.link(this);
1725         }
1726
1727         if (m_decodeSurrogatePairs && !U_IS_BMP(ch)) {
1728             // subtract countRegister*2 for non-BMP characters
1729             lshift32(TrustedImm32(1), countRegister);
1730         }
1731
1732         sub32(countRegister, index);
1733         m_backtrackingState.fallthrough();
1734     }
1735
1736     void generateCharacterClassOnce(size_t opIndex)
1737     {
1738         YarrOp& op = m_ops[opIndex];
1739         PatternTerm* term = op.m_term;
1740
1741         const RegisterID character = regT0;
1742
1743         if (m_decodeSurrogatePairs) {
1744             op.m_jumps.append(jumpIfNoAvailableInput());
1745             storeToFrame(index, term->frameLocation + BackTrackInfoCharacterClass::beginIndex());
1746         }
1747
1748         JumpList matchDest;
1749         readCharacter(m_checkedOffset - term->inputPosition, character);
1750         // If we are matching the "any character" builtin class we only need to read the
1751         // character and don't need to match as it will always succeed.
1752         if (term->invert() || !term->characterClass->m_anyCharacter) {
1753             matchCharacterClass(character, matchDest, term->characterClass);
1754
1755             if (term->invert())
1756                 op.m_jumps.append(matchDest);
1757             else {
1758                 op.m_jumps.append(jump());
1759                 matchDest.link(this);
1760             }
1761         }
1762 #ifdef JIT_UNICODE_EXPRESSIONS
1763         if (m_decodeSurrogatePairs && (!term->characterClass->hasOneCharacterSize() || term->invert())) {
1764             Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
1765             add32(TrustedImm32(1), index);
1766             isBMPChar.link(this);
1767         }
1768 #endif
1769     }
1770     void backtrackCharacterClassOnce(size_t opIndex)
1771     {
1772 #ifdef JIT_UNICODE_EXPRESSIONS
1773         if (m_decodeSurrogatePairs) {
1774             YarrOp& op = m_ops[opIndex];
1775             PatternTerm* term = op.m_term;
1776
1777             m_backtrackingState.link(this);
1778             loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::beginIndex(), index);
1779             m_backtrackingState.fallthrough();
1780         }
1781 #endif
1782         backtrackTermDefault(opIndex);
1783     }
1784
1785     void generateCharacterClassFixed(size_t opIndex)
1786     {
1787         YarrOp& op = m_ops[opIndex];
1788         PatternTerm* term = op.m_term;
1789
1790         const RegisterID character = regT0;
1791         const RegisterID countRegister = regT1;
1792
1793         if (m_decodeSurrogatePairs)
1794             op.m_jumps.append(jumpIfNoAvailableInput());
1795
1796         move(index, countRegister);
1797
1798         Checked<unsigned> scaledMaxCount = term->quantityMaxCount;
1799
1800 #ifdef JIT_UNICODE_EXPRESSIONS
1801         if (m_decodeSurrogatePairs && term->characterClass->hasOnlyNonBMPCharacters() && !term->invert())
1802             scaledMaxCount *= 2;
1803 #endif
1804         sub32(Imm32(scaledMaxCount.unsafeGet()), countRegister);
1805
1806         Label loop(this);
1807         JumpList matchDest;
1808         readCharacter(m_checkedOffset - term->inputPosition - scaledMaxCount, character, countRegister);
1809         // If we are matching the "any character" builtin class we only need to read the
1810         // character and don't need to match as it will always succeed.
1811         if (term->invert() || !term->characterClass->m_anyCharacter) {
1812             matchCharacterClass(character, matchDest, term->characterClass);
1813
1814             if (term->invert())
1815                 op.m_jumps.append(matchDest);
1816             else {
1817                 op.m_jumps.append(jump());
1818                 matchDest.link(this);
1819             }
1820         }
1821
1822 #ifdef JIT_UNICODE_EXPRESSIONS
1823         if (m_decodeSurrogatePairs) {
1824             if (term->characterClass->hasOneCharacterSize() && !term->invert())
1825                 add32(TrustedImm32(term->characterClass->hasNonBMPCharacters() ? 2 : 1), countRegister);
1826             else {
1827                 add32(TrustedImm32(1), countRegister);
1828                 Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
1829                 op.m_jumps.append(atEndOfInput());
1830                 add32(TrustedImm32(1), countRegister);
1831                 add32(TrustedImm32(1), index);
1832                 isBMPChar.link(this);
1833             }
1834         } else
1835 #endif
1836             add32(TrustedImm32(1), countRegister);
1837         branch32(NotEqual, countRegister, index).linkTo(loop, this);
1838     }
1839     void backtrackCharacterClassFixed(size_t opIndex)
1840     {
1841         backtrackTermDefault(opIndex);
1842     }
1843
1844     void generateCharacterClassGreedy(size_t opIndex)
1845     {
1846         YarrOp& op = m_ops[opIndex];
1847         PatternTerm* term = op.m_term;
1848
1849         const RegisterID character = regT0;
1850         const RegisterID countRegister = regT1;
1851
1852         if (m_decodeSurrogatePairs && (!term->characterClass->hasOneCharacterSize() || term->invert()))
1853             storeToFrame(index, term->frameLocation + BackTrackInfoCharacterClass::beginIndex());
1854         move(TrustedImm32(0), countRegister);
1855
1856         JumpList failures;
1857         Label loop(this);
1858 #ifdef JIT_UNICODE_EXPRESSIONS
1859         if (term->characterClass->hasOneCharacterSize() && !term->invert() && term->characterClass->hasNonBMPCharacters()) {
1860             move(TrustedImm32(1), character);
1861             failures.append(checkNotEnoughInput(character));
1862         } else
1863 #endif
1864             failures.append(atEndOfInput());
1865
1866         if (term->invert()) {
1867             readCharacter(m_checkedOffset - term->inputPosition, character);
1868             matchCharacterClass(character, failures, term->characterClass);
1869         } else {
1870             JumpList matchDest;
1871             readCharacter(m_checkedOffset - term->inputPosition, character);
1872             // If we are matching the "any character" builtin class for non-unicode patterns,
1873             // we only need to read the character and don't need to match as it will always succeed.
1874             if (!term->characterClass->m_anyCharacter) {
1875                 matchCharacterClass(character, matchDest, term->characterClass);
1876                 failures.append(jump());
1877             }
1878             matchDest.link(this);
1879         }
1880
1881 #ifdef JIT_UNICODE_EXPRESSIONS
1882         if (m_decodeSurrogatePairs)
1883             advanceIndexAfterCharacterClassTermMatch(term, failures, character);
1884         else
1885 #endif
1886             add32(TrustedImm32(1), index);
1887         add32(TrustedImm32(1), countRegister);
1888
1889         if (term->quantityMaxCount != quantifyInfinite) {
1890             branch32(NotEqual, countRegister, Imm32(term->quantityMaxCount.unsafeGet())).linkTo(loop, this);
1891             failures.append(jump());
1892         } else
1893             jump(loop);
1894
1895         failures.link(this);
1896         op.m_reentry = label();
1897
1898         storeToFrame(countRegister, term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex());
1899     }
1900     void backtrackCharacterClassGreedy(size_t opIndex)
1901     {
1902         YarrOp& op = m_ops[opIndex];
1903         PatternTerm* term = op.m_term;
1904
1905         const RegisterID countRegister = regT1;
1906
1907         m_backtrackingState.link(this);
1908
1909         loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex(), countRegister);
1910         m_backtrackingState.append(branchTest32(Zero, countRegister));
1911         sub32(TrustedImm32(1), countRegister);
1912         storeToFrame(countRegister, term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex());
1913
1914         if (!m_decodeSurrogatePairs)
1915             sub32(TrustedImm32(1), index);
1916         else if (term->characterClass->hasOneCharacterSize() && !term->invert())
1917             sub32(TrustedImm32(term->characterClass->hasNonBMPCharacters() ? 2 : 1), index);
1918         else {
1919             // Rematch one less
1920             const RegisterID character = regT0;
1921
1922             loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::beginIndex(), index);
1923
1924             Label rematchLoop(this);
1925             readCharacter(m_checkedOffset - term->inputPosition, character);
1926
1927             sub32(TrustedImm32(1), countRegister);
1928             add32(TrustedImm32(1), index);
1929
1930 #ifdef JIT_UNICODE_EXPRESSIONS
1931             Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
1932             add32(TrustedImm32(1), index);
1933             isBMPChar.link(this);
1934 #endif
1935
1936             branchTest32(Zero, countRegister).linkTo(rematchLoop, this);
1937
1938             loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex(), countRegister);
1939         }
1940         jump(op.m_reentry);
1941     }
1942
1943     void generateCharacterClassNonGreedy(size_t opIndex)
1944     {
1945         YarrOp& op = m_ops[opIndex];
1946         PatternTerm* term = op.m_term;
1947
1948         const RegisterID countRegister = regT1;
1949
1950         move(TrustedImm32(0), countRegister);
1951         op.m_reentry = label();
1952
1953 #ifdef JIT_UNICODE_EXPRESSIONS
1954         if (m_decodeSurrogatePairs) {
1955             if (!term->characterClass->hasOneCharacterSize() || term->invert())
1956                 storeToFrame(index, term->frameLocation + BackTrackInfoCharacterClass::beginIndex());
1957         }
1958 #endif
1959
1960         storeToFrame(countRegister, term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex());
1961     }
1962
1963     void backtrackCharacterClassNonGreedy(size_t opIndex)
1964     {
1965         YarrOp& op = m_ops[opIndex];
1966         PatternTerm* term = op.m_term;
1967
1968         const RegisterID character = regT0;
1969         const RegisterID countRegister = regT1;
1970
1971         JumpList nonGreedyFailures;
1972
1973         m_backtrackingState.link(this);
1974
1975 #ifdef JIT_UNICODE_EXPRESSIONS
1976         if (m_decodeSurrogatePairs) {
1977             if (!term->characterClass->hasOneCharacterSize() || term->invert())
1978                 loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::beginIndex(), index);
1979         }
1980 #endif
1981
1982         loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex(), countRegister);
1983
1984         nonGreedyFailures.append(atEndOfInput());
1985         nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityMaxCount.unsafeGet())));
1986
1987         JumpList matchDest;
1988         readCharacter(m_checkedOffset - term->inputPosition, character);
1989         // If we are matching the "any character" builtin class for non-unicode patterns,
1990         // we only need to read the character and don't need to match as it will always succeed.
1991         if (term->invert() || !term->characterClass->m_anyCharacter) {
1992             matchCharacterClass(character, matchDest, term->characterClass);
1993
1994             if (term->invert())
1995                 nonGreedyFailures.append(matchDest);
1996             else {
1997                 nonGreedyFailures.append(jump());
1998                 matchDest.link(this);
1999             }
2000         }
2001
2002 #ifdef JIT_UNICODE_EXPRESSIONS
2003         if (m_decodeSurrogatePairs)
2004             advanceIndexAfterCharacterClassTermMatch(term, nonGreedyFailures, character);
2005         else
2006 #endif
2007             add32(TrustedImm32(1), index);
2008         add32(TrustedImm32(1), countRegister);
2009
2010         jump(op.m_reentry);
2011
2012         nonGreedyFailures.link(this);
2013         sub32(countRegister, index);
2014         m_backtrackingState.fallthrough();
2015     }
2016
2017     void generateDotStarEnclosure(size_t opIndex)
2018     {
2019         YarrOp& op = m_ops[opIndex];
2020         PatternTerm* term = op.m_term;
2021
2022         const RegisterID character = regT0;
2023         const RegisterID matchPos = regT1;
2024 #ifndef HAVE_INITIAL_START_REG
2025         const RegisterID initialStart = character;
2026 #endif
2027
2028         JumpList foundBeginningNewLine;
2029         JumpList saveStartIndex;
2030         JumpList foundEndingNewLine;
2031
2032         if (m_pattern.dotAll()) {
2033             move(TrustedImm32(0), matchPos);
2034             setMatchStart(matchPos);
2035             move(length, index);
2036             return;
2037         }
2038
2039         ASSERT(!m_pattern.m_body->m_hasFixedSize);
2040         getMatchStart(matchPos);
2041
2042 #ifndef HAVE_INITIAL_START_REG
2043         loadFromFrame(m_pattern.m_initialStartValueFrameLocation, initialStart);
2044 #endif
2045         saveStartIndex.append(branch32(BelowOrEqual, matchPos, initialStart));
2046         Label findBOLLoop(this);
2047         sub32(TrustedImm32(1), matchPos);
2048         if (m_charSize == Char8)
2049             load8(BaseIndex(input, matchPos, TimesOne, 0), character);
2050         else
2051             load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
2052         matchCharacterClass(character, foundBeginningNewLine, m_pattern.newlineCharacterClass());
2053
2054 #ifndef HAVE_INITIAL_START_REG
2055         loadFromFrame(m_pattern.m_initialStartValueFrameLocation, initialStart);
2056 #endif
2057         branch32(Above, matchPos, initialStart).linkTo(findBOLLoop, this);
2058         saveStartIndex.append(jump());
2059
2060         foundBeginningNewLine.link(this);
2061         add32(TrustedImm32(1), matchPos); // Advance past newline
2062         saveStartIndex.link(this);
2063
2064         if (!m_pattern.multiline() && term->anchors.bolAnchor)
2065             op.m_jumps.append(branchTest32(NonZero, matchPos));
2066
2067         ASSERT(!m_pattern.m_body->m_hasFixedSize);
2068         setMatchStart(matchPos);
2069
2070         move(index, matchPos);
2071
2072         Label findEOLLoop(this);        
2073         foundEndingNewLine.append(branch32(Equal, matchPos, length));
2074         if (m_charSize == Char8)
2075             load8(BaseIndex(input, matchPos, TimesOne, 0), character);
2076         else
2077             load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
2078         matchCharacterClass(character, foundEndingNewLine, m_pattern.newlineCharacterClass());
2079         add32(TrustedImm32(1), matchPos);
2080         jump(findEOLLoop);
2081
2082         foundEndingNewLine.link(this);
2083
2084         if (!m_pattern.multiline() && term->anchors.eolAnchor)
2085             op.m_jumps.append(branch32(NotEqual, matchPos, length));
2086
2087         move(matchPos, index);
2088     }
2089
2090     void backtrackDotStarEnclosure(size_t opIndex)
2091     {
2092         backtrackTermDefault(opIndex);
2093     }
2094     
2095     // Code generation/backtracking for simple terms
2096     // (pattern characters, character classes, and assertions).
2097     // These methods farm out work to the set of functions above.
2098     void generateTerm(size_t opIndex)
2099     {
2100         YarrOp& op = m_ops[opIndex];
2101         PatternTerm* term = op.m_term;
2102
2103         switch (term->type) {
2104         case PatternTerm::TypePatternCharacter:
2105             switch (term->quantityType) {
2106             case QuantifierFixedCount:
2107                 if (term->quantityMaxCount == 1)
2108                     generatePatternCharacterOnce(opIndex);
2109                 else
2110                     generatePatternCharacterFixed(opIndex);
2111                 break;
2112             case QuantifierGreedy:
2113                 generatePatternCharacterGreedy(opIndex);
2114                 break;
2115             case QuantifierNonGreedy:
2116                 generatePatternCharacterNonGreedy(opIndex);
2117                 break;
2118             }
2119             break;
2120
2121         case PatternTerm::TypeCharacterClass:
2122             switch (term->quantityType) {
2123             case QuantifierFixedCount:
2124                 if (term->quantityMaxCount == 1)
2125                     generateCharacterClassOnce(opIndex);
2126                 else
2127                     generateCharacterClassFixed(opIndex);
2128                 break;
2129             case QuantifierGreedy:
2130                 generateCharacterClassGreedy(opIndex);
2131                 break;
2132             case QuantifierNonGreedy:
2133                 generateCharacterClassNonGreedy(opIndex);
2134                 break;
2135             }
2136             break;
2137
2138         case PatternTerm::TypeAssertionBOL:
2139             generateAssertionBOL(opIndex);
2140             break;
2141
2142         case PatternTerm::TypeAssertionEOL:
2143             generateAssertionEOL(opIndex);
2144             break;
2145
2146         case PatternTerm::TypeAssertionWordBoundary:
2147             generateAssertionWordBoundary(opIndex);
2148             break;
2149
2150         case PatternTerm::TypeForwardReference:
2151             m_failureReason = JITFailureReason::ForwardReference;
2152             break;
2153
2154         case PatternTerm::TypeParenthesesSubpattern:
2155         case PatternTerm::TypeParentheticalAssertion:
2156             RELEASE_ASSERT_NOT_REACHED();
2157
2158         case PatternTerm::TypeBackReference:
2159 #if ENABLE(YARR_JIT_BACKREFERENCES)
2160             generateBackReference(opIndex);
2161 #else
2162             m_failureReason = JITFailureReason::BackReference;
2163 #endif
2164             break;
2165         case PatternTerm::TypeDotStarEnclosure:
2166             generateDotStarEnclosure(opIndex);
2167             break;
2168         }
2169     }
2170     void backtrackTerm(size_t opIndex)
2171     {
2172         YarrOp& op = m_ops[opIndex];
2173         PatternTerm* term = op.m_term;
2174
2175         switch (term->type) {
2176         case PatternTerm::TypePatternCharacter:
2177             switch (term->quantityType) {
2178             case QuantifierFixedCount:
2179                 if (term->quantityMaxCount == 1)
2180                     backtrackPatternCharacterOnce(opIndex);
2181                 else
2182                     backtrackPatternCharacterFixed(opIndex);
2183                 break;
2184             case QuantifierGreedy:
2185                 backtrackPatternCharacterGreedy(opIndex);
2186                 break;
2187             case QuantifierNonGreedy:
2188                 backtrackPatternCharacterNonGreedy(opIndex);
2189                 break;
2190             }
2191             break;
2192
2193         case PatternTerm::TypeCharacterClass:
2194             switch (term->quantityType) {
2195             case QuantifierFixedCount:
2196                 if (term->quantityMaxCount == 1)
2197                     backtrackCharacterClassOnce(opIndex);
2198                 else
2199                     backtrackCharacterClassFixed(opIndex);
2200                 break;
2201             case QuantifierGreedy:
2202                 backtrackCharacterClassGreedy(opIndex);
2203                 break;
2204             case QuantifierNonGreedy:
2205                 backtrackCharacterClassNonGreedy(opIndex);
2206                 break;
2207             }
2208             break;
2209
2210         case PatternTerm::TypeAssertionBOL:
2211             backtrackAssertionBOL(opIndex);
2212             break;
2213
2214         case PatternTerm::TypeAssertionEOL:
2215             backtrackAssertionEOL(opIndex);
2216             break;
2217
2218         case PatternTerm::TypeAssertionWordBoundary:
2219             backtrackAssertionWordBoundary(opIndex);
2220             break;
2221
2222         case PatternTerm::TypeForwardReference:
2223             m_failureReason = JITFailureReason::ForwardReference;
2224             break;
2225
2226         case PatternTerm::TypeParenthesesSubpattern:
2227         case PatternTerm::TypeParentheticalAssertion:
2228             RELEASE_ASSERT_NOT_REACHED();
2229
2230         case PatternTerm::TypeBackReference:
2231 #if ENABLE(YARR_JIT_BACKREFERENCES)
2232             backtrackBackReference(opIndex);
2233 #else
2234             m_failureReason = JITFailureReason::BackReference;
2235 #endif
2236             break;
2237
2238         case PatternTerm::TypeDotStarEnclosure:
2239             backtrackDotStarEnclosure(opIndex);
2240             break;
2241         }
2242     }
2243
2244     void generate()
2245     {
2246         // Forwards generate the matching code.
2247         ASSERT(m_ops.size());
2248         size_t opIndex = 0;
2249
2250         do {
2251             if (m_disassembler)
2252                 m_disassembler->setForGenerate(opIndex, label());
2253
2254             YarrOp& op = m_ops[opIndex];
2255             switch (op.m_op) {
2256
2257             case OpTerm:
2258                 generateTerm(opIndex);
2259                 break;
2260
2261             // OpBodyAlternativeBegin/Next/End
2262             //
2263             // These nodes wrap the set of alternatives in the body of the regular expression.
2264             // There may be either one or two chains of OpBodyAlternative nodes, one representing
2265             // the 'once through' sequence of alternatives (if any exist), and one representing
2266             // the repeating alternatives (again, if any exist).
2267             //
2268             // Upon normal entry to the Begin alternative, we will check that input is available.
2269             // Reentry to the Begin alternative will take place after the check has taken place,
2270             // and will assume that the input position has already been progressed as appropriate.
2271             //
2272             // Entry to subsequent Next/End alternatives occurs when the prior alternative has
2273             // successfully completed a match - return a success state from JIT code.
2274             //
2275             // Next alternatives allow for reentry optimized to suit backtracking from its
2276             // preceding alternative. It expects the input position to still be set to a position
2277             // appropriate to its predecessor, and it will only perform an input check if the
2278             // predecessor had a minimum size less than its own.
2279             //
2280             // In the case 'once through' expressions, the End node will also have a reentry
2281             // point to jump to when the last alternative fails. Again, this expects the input
2282             // position to still reflect that expected by the prior alternative.
2283             case OpBodyAlternativeBegin: {
2284                 PatternAlternative* alternative = op.m_alternative;
2285
2286                 // Upon entry at the head of the set of alternatives, check if input is available
2287                 // to run the first alternative. (This progresses the input position).
2288                 op.m_jumps.append(jumpIfNoAvailableInput(alternative->m_minimumSize));
2289                 // We will reenter after the check, and assume the input position to have been
2290                 // set as appropriate to this alternative.
2291                 op.m_reentry = label();
2292
2293                 m_checkedOffset += alternative->m_minimumSize;
2294                 break;
2295             }
2296             case OpBodyAlternativeNext:
2297             case OpBodyAlternativeEnd: {
2298                 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
2299                 PatternAlternative* alternative = op.m_alternative;
2300
2301                 // If we get here, the prior alternative matched - return success.
2302                 
2303                 // Adjust the stack pointer to remove the pattern's frame.
2304                 removeCallFrame();
2305
2306                 // Load appropriate values into the return register and the first output
2307                 // slot, and return. In the case of pattern with a fixed size, we will
2308                 // not have yet set the value in the first 
2309                 ASSERT(index != returnRegister);
2310                 if (m_pattern.m_body->m_hasFixedSize) {
2311                     move(index, returnRegister);
2312                     if (priorAlternative->m_minimumSize)
2313                         sub32(Imm32(priorAlternative->m_minimumSize), returnRegister);
2314                     if (compileMode == IncludeSubpatterns)
2315                         store32(returnRegister, output);
2316                 } else
2317                     getMatchStart(returnRegister);
2318                 if (compileMode == IncludeSubpatterns)
2319                     store32(index, Address(output, 4));
2320                 move(index, returnRegister2);
2321
2322                 generateReturn();
2323
2324                 // This is the divide between the tail of the prior alternative, above, and
2325                 // the head of the subsequent alternative, below.
2326
2327                 if (op.m_op == OpBodyAlternativeNext) {
2328                     // This is the reentry point for the Next alternative. We expect any code
2329                     // that jumps here to do so with the input position matching that of the
2330                     // PRIOR alteranative, and we will only check input availability if we
2331                     // need to progress it forwards.
2332                     op.m_reentry = label();
2333                     if (alternative->m_minimumSize > priorAlternative->m_minimumSize) {
2334                         add32(Imm32(alternative->m_minimumSize - priorAlternative->m_minimumSize), index);
2335                         op.m_jumps.append(jumpIfNoAvailableInput());
2336                     } else if (priorAlternative->m_minimumSize > alternative->m_minimumSize)
2337                         sub32(Imm32(priorAlternative->m_minimumSize - alternative->m_minimumSize), index);
2338                 } else if (op.m_nextOp == notFound) {
2339                     // This is the reentry point for the End of 'once through' alternatives,
2340                     // jumped to when the last alternative fails to match.
2341                     op.m_reentry = label();
2342                     sub32(Imm32(priorAlternative->m_minimumSize), index);
2343                 }
2344
2345                 if (op.m_op == OpBodyAlternativeNext)
2346                     m_checkedOffset += alternative->m_minimumSize;
2347                 m_checkedOffset -= priorAlternative->m_minimumSize;
2348                 break;
2349             }
2350
2351             // OpSimpleNestedAlternativeBegin/Next/End
2352             // OpNestedAlternativeBegin/Next/End
2353             //
2354             // These nodes are used to handle sets of alternatives that are nested within
2355             // subpatterns and parenthetical assertions. The 'simple' forms are used where
2356             // we do not need to be able to backtrack back into any alternative other than
2357             // the last, the normal forms allow backtracking into any alternative.
2358             //
2359             // Each Begin/Next node is responsible for planting an input check to ensure
2360             // sufficient input is available on entry. Next nodes additionally need to
2361             // jump to the end - Next nodes use the End node's m_jumps list to hold this
2362             // set of jumps.
2363             //
2364             // In the non-simple forms, successful alternative matches must store a
2365             // 'return address' using a DataLabelPtr, used to store the address to jump
2366             // to when backtracking, to get to the code for the appropriate alternative.
2367             case OpSimpleNestedAlternativeBegin:
2368             case OpNestedAlternativeBegin: {
2369                 PatternTerm* term = op.m_term;
2370                 PatternAlternative* alternative = op.m_alternative;
2371                 PatternDisjunction* disjunction = term->parentheses.disjunction;
2372
2373                 // Calculate how much input we need to check for, and if non-zero check.
2374                 op.m_checkAdjust = Checked<unsigned>(alternative->m_minimumSize);
2375                 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
2376                     op.m_checkAdjust -= disjunction->m_minimumSize;
2377                 if (op.m_checkAdjust)
2378                     op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust.unsafeGet()));
2379
2380                 m_checkedOffset += op.m_checkAdjust;
2381                 break;
2382             }
2383             case OpSimpleNestedAlternativeNext:
2384             case OpNestedAlternativeNext: {
2385                 PatternTerm* term = op.m_term;
2386                 PatternAlternative* alternative = op.m_alternative;
2387                 PatternDisjunction* disjunction = term->parentheses.disjunction;
2388
2389                 // In the non-simple case, store a 'return address' so we can backtrack correctly.
2390                 if (op.m_op == OpNestedAlternativeNext) {
2391                     unsigned parenthesesFrameLocation = term->frameLocation;
2392                     op.m_returnAddress = storeToFrameWithPatch(parenthesesFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
2393                 }
2394
2395                 if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
2396                     // If the previous alternative matched without consuming characters then
2397                     // backtrack to try to match while consumming some input.
2398                     op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
2399                 }
2400
2401                 // If we reach here then the last alternative has matched - jump to the
2402                 // End node, to skip over any further alternatives.
2403                 //
2404                 // FIXME: this is logically O(N^2) (though N can be expected to be very
2405                 // small). We could avoid this either by adding an extra jump to the JIT
2406                 // data structures, or by making backtracking code that jumps to Next
2407                 // alternatives are responsible for checking that input is available (if
2408                 // we didn't need to plant the input checks, then m_jumps would be free).
2409                 YarrOp* endOp = &m_ops[op.m_nextOp];
2410                 while (endOp->m_nextOp != notFound) {
2411                     ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
2412                     endOp = &m_ops[endOp->m_nextOp];
2413                 }
2414                 ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
2415                 endOp->m_jumps.append(jump());
2416
2417                 // This is the entry point for the next alternative.
2418                 op.m_reentry = label();
2419
2420                 // Calculate how much input we need to check for, and if non-zero check.
2421                 op.m_checkAdjust = alternative->m_minimumSize;
2422                 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
2423                     op.m_checkAdjust -= disjunction->m_minimumSize;
2424                 if (op.m_checkAdjust)
2425                     op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust.unsafeGet()));
2426
2427                 YarrOp& lastOp = m_ops[op.m_previousOp];
2428                 m_checkedOffset -= lastOp.m_checkAdjust;
2429                 m_checkedOffset += op.m_checkAdjust;
2430                 break;
2431             }
2432             case OpSimpleNestedAlternativeEnd:
2433             case OpNestedAlternativeEnd: {
2434                 PatternTerm* term = op.m_term;
2435
2436                 // In the non-simple case, store a 'return address' so we can backtrack correctly.
2437                 if (op.m_op == OpNestedAlternativeEnd) {
2438                     unsigned parenthesesFrameLocation = term->frameLocation;
2439                     op.m_returnAddress = storeToFrameWithPatch(parenthesesFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
2440                 }
2441
2442                 if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
2443                     // If the previous alternative matched without consuming characters then
2444                     // backtrack to try to match while consumming some input.
2445                     op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
2446                 }
2447
2448                 // If this set of alternatives contains more than one alternative,
2449                 // then the Next nodes will have planted jumps to the End, and added
2450                 // them to this node's m_jumps list.
2451                 op.m_jumps.link(this);
2452                 op.m_jumps.clear();
2453
2454                 YarrOp& lastOp = m_ops[op.m_previousOp];
2455                 m_checkedOffset -= lastOp.m_checkAdjust;
2456                 break;
2457             }
2458
2459             // OpParenthesesSubpatternOnceBegin/End
2460             //
2461             // These nodes support (optionally) capturing subpatterns, that have a
2462             // quantity count of 1 (this covers fixed once, and ?/?? quantifiers). 
2463             case OpParenthesesSubpatternOnceBegin: {
2464                 PatternTerm* term = op.m_term;
2465                 unsigned parenthesesFrameLocation = term->frameLocation;
2466                 const RegisterID indexTemporary = regT0;
2467                 ASSERT(term->quantityMaxCount == 1);
2468
2469                 // Upon entry to a Greedy quantified set of parenthese store the index.
2470                 // We'll use this for two purposes:
2471                 //  - To indicate which iteration we are on of mathing the remainder of
2472                 //    the expression after the parentheses - the first, including the
2473                 //    match within the parentheses, or the second having skipped over them.
2474                 //  - To check for empty matches, which must be rejected.
2475                 //
2476                 // At the head of a NonGreedy set of parentheses we'll immediately set the
2477                 // value on the stack to -1 (indicating a match skipping the subpattern),
2478                 // and plant a jump to the end. We'll also plant a label to backtrack to
2479                 // to reenter the subpattern later, with a store to set up index on the
2480                 // second iteration.
2481                 //
2482                 // FIXME: for capturing parens, could use the index in the capture array?
2483                 if (term->quantityType == QuantifierGreedy)
2484                     storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex());
2485                 else if (term->quantityType == QuantifierNonGreedy) {
2486                     storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex());
2487                     op.m_jumps.append(jump());
2488                     op.m_reentry = label();
2489                     storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex());
2490                 }
2491
2492                 // If the parenthese are capturing, store the starting index value to the
2493                 // captures array, offsetting as necessary.
2494                 //
2495                 // FIXME: could avoid offsetting this value in JIT code, apply
2496                 // offsets only afterwards, at the point the results array is
2497                 // being accessed.
2498                 if (term->capture() && compileMode == IncludeSubpatterns) {
2499                     unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2500                     if (term->quantityType == QuantifierFixedCount)
2501                         inputOffset += term->parentheses.disjunction->m_minimumSize;
2502                     if (inputOffset) {
2503                         move(index, indexTemporary);
2504                         sub32(Imm32(inputOffset), indexTemporary);
2505                         setSubpatternStart(indexTemporary, term->parentheses.subpatternId);
2506                     } else
2507                         setSubpatternStart(index, term->parentheses.subpatternId);
2508                 }
2509                 break;
2510             }
2511             case OpParenthesesSubpatternOnceEnd: {
2512                 PatternTerm* term = op.m_term;
2513                 const RegisterID indexTemporary = regT0;
2514                 ASSERT(term->quantityMaxCount == 1);
2515
2516                 // If the nested alternative matched without consuming any characters, punt this back to the interpreter.
2517                 // FIXME: <https://bugs.webkit.org/show_bug.cgi?id=200786> Add ability for the YARR JIT to properly
2518                 // handle nested expressions that can match without consuming characters
2519                 if (term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize)
2520                     m_abortExecution.append(branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*))));
2521
2522                 // If the parenthese are capturing, store the ending index value to the
2523                 // captures array, offsetting as necessary.
2524                 //
2525                 // FIXME: could avoid offsetting this value in JIT code, apply
2526                 // offsets only afterwards, at the point the results array is
2527                 // being accessed.
2528                 if (term->capture() && compileMode == IncludeSubpatterns) {
2529                     unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2530                     if (inputOffset) {
2531                         move(index, indexTemporary);
2532                         sub32(Imm32(inputOffset), indexTemporary);
2533                         setSubpatternEnd(indexTemporary, term->parentheses.subpatternId);
2534                     } else
2535                         setSubpatternEnd(index, term->parentheses.subpatternId);
2536                 }
2537
2538                 // If the parentheses are quantified Greedy then add a label to jump back
2539                 // to if we get a failed match from after the parentheses. For NonGreedy
2540                 // parentheses, link the jump from before the subpattern to here.
2541                 if (term->quantityType == QuantifierGreedy)
2542                     op.m_reentry = label();
2543                 else if (term->quantityType == QuantifierNonGreedy) {
2544                     YarrOp& beginOp = m_ops[op.m_previousOp];
2545                     beginOp.m_jumps.link(this);
2546                 }
2547                 break;
2548             }
2549
2550             // OpParenthesesSubpatternTerminalBegin/End
2551             case OpParenthesesSubpatternTerminalBegin: {
2552                 PatternTerm* term = op.m_term;
2553                 ASSERT(term->quantityType == QuantifierGreedy);
2554                 ASSERT(term->quantityMaxCount == quantifyInfinite);
2555                 ASSERT(!term->capture());
2556
2557                 // Upon entry set a label to loop back to.
2558                 op.m_reentry = label();
2559
2560                 // Store the start index of the current match; we need to reject zero
2561                 // length matches.
2562                 storeToFrame(index, term->frameLocation + BackTrackInfoParenthesesTerminal::beginIndex());
2563                 break;
2564             }
2565             case OpParenthesesSubpatternTerminalEnd: {
2566                 YarrOp& beginOp = m_ops[op.m_previousOp];
2567                 PatternTerm* term = op.m_term;
2568
2569                 // If the nested alternative matched without consuming any characters, punt this back to the interpreter.
2570                 // FIXME: <https://bugs.webkit.org/show_bug.cgi?id=200786> Add ability for the YARR JIT to properly
2571                 // handle nested expressions that can match without consuming characters
2572                 if (term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize)
2573                     m_abortExecution.append(branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*))));
2574
2575                 // We know that the match is non-zero, we can accept it and
2576                 // loop back up to the head of the subpattern.
2577                 jump(beginOp.m_reentry);
2578
2579                 // This is the entry point to jump to when we stop matching - we will
2580                 // do so once the subpattern cannot match any more.
2581                 op.m_reentry = label();
2582                 break;
2583             }
2584
2585             // OpParenthesesSubpatternBegin/End
2586             //
2587             // These nodes support generic subpatterns.
2588             case OpParenthesesSubpatternBegin: {
2589 #if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
2590                 PatternTerm* term = op.m_term;
2591                 unsigned parenthesesFrameLocation = term->frameLocation;
2592
2593                 // Upon entry to a Greedy quantified set of parenthese store the index.
2594                 // We'll use this for two purposes:
2595                 //  - To indicate which iteration we are on of mathing the remainder of
2596                 //    the expression after the parentheses - the first, including the
2597                 //    match within the parentheses, or the second having skipped over them.
2598                 //  - To check for empty matches, which must be rejected.
2599                 //
2600                 // At the head of a NonGreedy set of parentheses we'll immediately set 'begin'
2601                 // in the backtrack info to -1 (indicating a match skipping the subpattern),
2602                 // and plant a jump to the end. We'll also plant a label to backtrack to
2603                 // to reenter the subpattern later, with a store to set 'begin' to current index
2604                 // on the second iteration.
2605                 //
2606                 // FIXME: for capturing parens, could use the index in the capture array?
2607                 if (term->quantityType == QuantifierGreedy || term->quantityType == QuantifierNonGreedy) {
2608                     storeToFrame(TrustedImm32(0), parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
2609                     storeToFrame(TrustedImmPtr(nullptr), parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
2610
2611                     if (term->quantityType == QuantifierNonGreedy) {
2612                         storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
2613                         op.m_jumps.append(jump());
2614                     }
2615                     
2616                     op.m_reentry = label();
2617                     RegisterID currParenContextReg = regT0;
2618                     RegisterID newParenContextReg = regT1;
2619
2620                     loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex(), currParenContextReg);
2621                     allocateParenContext(newParenContextReg);
2622                     storePtr(currParenContextReg, newParenContextReg);
2623                     storeToFrame(newParenContextReg, parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
2624                     saveParenContext(newParenContextReg, regT2, term->parentheses.subpatternId, term->parentheses.lastSubpatternId, parenthesesFrameLocation);
2625                     storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
2626                 }
2627
2628                 // If the parenthese are capturing, store the starting index value to the
2629                 // captures array, offsetting as necessary.
2630                 //
2631                 // FIXME: could avoid offsetting this value in JIT code, apply
2632                 // offsets only afterwards, at the point the results array is
2633                 // being accessed.
2634                 if (term->capture() && compileMode == IncludeSubpatterns) {
2635                     const RegisterID indexTemporary = regT0;
2636                     unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2637                     if (term->quantityType == QuantifierFixedCount)
2638                         inputOffset += term->parentheses.disjunction->m_minimumSize;
2639                     if (inputOffset) {
2640                         move(index, indexTemporary);
2641                         sub32(Imm32(inputOffset), indexTemporary);
2642                         setSubpatternStart(indexTemporary, term->parentheses.subpatternId);
2643                     } else
2644                         setSubpatternStart(index, term->parentheses.subpatternId);
2645                 }
2646 #else // !YARR_JIT_ALL_PARENS_EXPRESSIONS
2647                 RELEASE_ASSERT_NOT_REACHED();
2648 #endif
2649                 break;
2650             }
2651             case OpParenthesesSubpatternEnd: {
2652 #if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
2653                 PatternTerm* term = op.m_term;
2654                 unsigned parenthesesFrameLocation = term->frameLocation;
2655
2656                 // If the nested alternative matched without consuming any characters, punt this back to the interpreter.
2657                 // FIXME: <https://bugs.webkit.org/show_bug.cgi?id=200786> Add ability for the YARR JIT to properly
2658                 // handle nested expressions that can match without consuming characters
2659                 if (term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize)
2660                     m_abortExecution.append(branch32(Equal, index, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*))));
2661
2662                 const RegisterID countTemporary = regT1;
2663
2664                 YarrOp& beginOp = m_ops[op.m_previousOp];
2665                 loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), countTemporary);
2666                 add32(TrustedImm32(1), countTemporary);
2667                 storeToFrame(countTemporary, parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
2668
2669                 // If the parenthese are capturing, store the ending index value to the
2670                 // captures array, offsetting as necessary.
2671                 //
2672                 // FIXME: could avoid offsetting this value in JIT code, apply
2673                 // offsets only afterwards, at the point the results array is
2674                 // being accessed.
2675                 if (term->capture() && compileMode == IncludeSubpatterns) {
2676                     const RegisterID indexTemporary = regT0;
2677                     
2678                     unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2679                     if (inputOffset) {
2680                         move(index, indexTemporary);
2681                         sub32(Imm32(inputOffset), indexTemporary);
2682                         setSubpatternEnd(indexTemporary, term->parentheses.subpatternId);
2683                     } else
2684                         setSubpatternEnd(index, term->parentheses.subpatternId);
2685                 }
2686
2687                 // If the parentheses are quantified Greedy then add a label to jump back
2688                 // to if we get a failed match from after the parentheses. For NonGreedy
2689                 // parentheses, link the jump from before the subpattern to here.
2690                 if (term->quantityType == QuantifierGreedy) {
2691                     if (term->quantityMaxCount != quantifyInfinite)
2692                         branch32(Below, countTemporary, Imm32(term->quantityMaxCount.unsafeGet())).linkTo(beginOp.m_reentry, this);
2693                     else
2694                         jump(beginOp.m_reentry);
2695                     
2696                     op.m_reentry = label();
2697                 } else if (term->quantityType == QuantifierNonGreedy) {
2698                     YarrOp& beginOp = m_ops[op.m_previousOp];
2699                     beginOp.m_jumps.link(this);
2700                     op.m_reentry = label();
2701                 }
2702 #else // !YARR_JIT_ALL_PARENS_EXPRESSIONS
2703                 RELEASE_ASSERT_NOT_REACHED();
2704 #endif
2705                 break;
2706             }
2707
2708             // OpParentheticalAssertionBegin/End
2709             case OpParentheticalAssertionBegin: {
2710                 PatternTerm* term = op.m_term;
2711
2712                 // Store the current index - assertions should not update index, so
2713                 // we will need to restore it upon a successful match.
2714                 unsigned parenthesesFrameLocation = term->frameLocation;
2715                 storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParentheticalAssertion::beginIndex());
2716
2717                 // Check 
2718                 op.m_checkAdjust = m_checkedOffset - term->inputPosition;
2719                 if (op.m_checkAdjust)
2720                     sub32(Imm32(op.m_checkAdjust.unsafeGet()), index);
2721
2722                 m_checkedOffset -= op.m_checkAdjust;
2723                 break;
2724             }
2725             case OpParentheticalAssertionEnd: {
2726                 PatternTerm* term = op.m_term;
2727
2728                 // Restore the input index value.
2729                 unsigned parenthesesFrameLocation = term->frameLocation;
2730                 loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheticalAssertion::beginIndex(), index);
2731
2732                 // If inverted, a successful match of the assertion must be treated
2733                 // as a failure, so jump to backtracking.
2734                 if (term->invert()) {
2735                     op.m_jumps.append(jump());
2736                     op.m_reentry = label();
2737                 }
2738
2739                 YarrOp& lastOp = m_ops[op.m_previousOp];
2740                 m_checkedOffset += lastOp.m_checkAdjust;
2741                 break;
2742             }
2743
2744             case OpMatchFailed:
2745                 removeCallFrame();
2746                 generateFailReturn();
2747                 break;
2748             }
2749
2750             ++opIndex;
2751         } while (opIndex < m_ops.size());
2752     }
2753
2754     void backtrack()
2755     {
2756         // Backwards generate the backtracking code.
2757         size_t opIndex = m_ops.size();
2758         ASSERT(opIndex);
2759
2760         do {
2761             --opIndex;
2762
2763             if (m_disassembler)
2764                 m_disassembler->setForBacktrack(opIndex, label());
2765
2766             YarrOp& op = m_ops[opIndex];
2767             switch (op.m_op) {
2768
2769             case OpTerm:
2770                 backtrackTerm(opIndex);
2771                 break;
2772
2773             // OpBodyAlternativeBegin/Next/End
2774             //
2775             // For each Begin/Next node representing an alternative, we need to decide what to do
2776             // in two circumstances:
2777             //  - If we backtrack back into this node, from within the alternative.
2778             //  - If the input check at the head of the alternative fails (if this exists).
2779             //
2780             // We treat these two cases differently since in the former case we have slightly
2781             // more information - since we are backtracking out of a prior alternative we know
2782             // that at least enough input was available to run it. For example, given the regular
2783             // expression /a|b/, if we backtrack out of the first alternative (a failed pattern
2784             // character match of 'a'), then we need not perform an additional input availability
2785             // check before running the second alternative.
2786             //
2787             // Backtracking required differs for the last alternative, which in the case of the
2788             // repeating set of alternatives must loop. The code generated for the last alternative
2789             // will also be used to handle all input check failures from any prior alternatives -
2790             // these require similar functionality, in seeking the next available alternative for
2791             // which there is sufficient input.
2792             //
2793             // Since backtracking of all other alternatives simply requires us to link backtracks
2794             // to the reentry point for the subsequent alternative, we will only be generating any
2795             // code when backtracking the last alternative.
2796             case OpBodyAlternativeBegin:
2797             case OpBodyAlternativeNext: {
2798                 PatternAlternative* alternative = op.m_alternative;
2799
2800                 if (op.m_op == OpBodyAlternativeNext) {
2801                     PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
2802                     m_checkedOffset += priorAlternative->m_minimumSize;
2803                 }
2804                 m_checkedOffset -= alternative->m_minimumSize;
2805
2806                 // Is this the last alternative? If not, then if we backtrack to this point we just
2807                 // need to jump to try to match the next alternative.
2808                 if (m_ops[op.m_nextOp].m_op != OpBodyAlternativeEnd) {
2809                     m_backtrackingState.linkTo(m_ops[op.m_nextOp].m_reentry, this);
2810                     break;
2811                 }
2812                 YarrOp& endOp = m_ops[op.m_nextOp];
2813
2814                 YarrOp* beginOp = &op;
2815                 while (beginOp->m_op != OpBodyAlternativeBegin) {
2816                     ASSERT(beginOp->m_op == OpBodyAlternativeNext);
2817                     beginOp = &m_ops[beginOp->m_previousOp];
2818                 }
2819
2820                 bool onceThrough = endOp.m_nextOp == notFound;
2821                 
2822                 JumpList lastStickyAlternativeFailures;
2823
2824                 // First, generate code to handle cases where we backtrack out of an attempted match
2825                 // of the last alternative. If this is a 'once through' set of alternatives then we
2826                 // have nothing to do - link this straight through to the End.
2827                 if (onceThrough)
2828                     m_backtrackingState.linkTo(endOp.m_reentry, this);
2829                 else {
2830                     // If we don't need to move the input poistion, and the pattern has a fixed size
2831                     // (in which case we omit the store of the start index until the pattern has matched)
2832                     // then we can just link the backtrack out of the last alternative straight to the
2833                     // head of the first alternative.
2834                     if (m_pattern.m_body->m_hasFixedSize
2835                         && (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize)
2836                         && (alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize == 1))
2837                         m_backtrackingState.linkTo(beginOp->m_reentry, this);
2838                     else if (m_pattern.sticky() && m_ops[op.m_nextOp].m_op == OpBodyAlternativeEnd) {
2839                         // It is a sticky pattern and the last alternative failed, jump to the end.
2840                         m_backtrackingState.takeBacktracksToJumpList(lastStickyAlternativeFailures, this);
2841                     } else {
2842                         // We need to generate a trampoline of code to execute before looping back
2843                         // around to the first alternative.
2844                         m_backtrackingState.link(this);
2845
2846                         // No need to advance and retry for a sticky pattern.
2847                         if (!m_pattern.sticky()) {
2848                             // If the pattern size is not fixed, then store the start index for use if we match.
2849                             if (!m_pattern.m_body->m_hasFixedSize) {
2850                                 if (alternative->m_minimumSize == 1)
2851                                     setMatchStart(index);
2852                                 else {
2853                                     move(index, regT0);
2854                                     if (alternative->m_minimumSize)
2855                                         sub32(Imm32(alternative->m_minimumSize - 1), regT0);
2856                                     else
2857                                         add32(TrustedImm32(1), regT0);
2858                                     setMatchStart(regT0);
2859                                 }
2860                             }
2861
2862                             // Generate code to loop. Check whether the last alternative is longer than the
2863                             // first (e.g. /a|xy/ or /a|xyz/).
2864                             if (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize) {
2865                                 // We want to loop, and increment input position. If the delta is 1, it is
2866                                 // already correctly incremented, if more than one then decrement as appropriate.
2867                                 unsigned delta = alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize;
2868                                 ASSERT(delta);
2869                                 if (delta != 1)
2870                                     sub32(Imm32(delta - 1), index);
2871                                 jump(beginOp->m_reentry);
2872                             } else {
2873                                 // If the first alternative has minimum size 0xFFFFFFFFu, then there cannot
2874                                 // be sufficent input available to handle this, so just fall through.
2875                                 unsigned delta = beginOp->m_alternative->m_minimumSize - alternative->m_minimumSize;
2876                                 if (delta != 0xFFFFFFFFu) {
2877                                     // We need to check input because we are incrementing the input.
2878                                     add32(Imm32(delta + 1), index);
2879                                     checkInput().linkTo(beginOp->m_reentry, this);
2880                                 }
2881                             }
2882                         }
2883                     }
2884                 }
2885
2886                 // We can reach this point in the code in two ways:
2887                 //  - Fallthrough from the code above (a repeating alternative backtracked out of its
2888                 //    last alternative, and did not have sufficent input to run the first).
2889                 //  - We will loop back up to the following label when a repeating alternative loops,
2890                 //    following a failed input check.
2891                 //
2892                 // Either way, we have just failed the input check for the first alternative.
2893                 Label firstInputCheckFailed(this);
2894
2895                 // Generate code to handle input check failures from alternatives except the last.
2896                 // prevOp is the alternative we're handling a bail out from (initially Begin), and
2897                 // nextOp is the alternative we will be attempting to reenter into.
2898                 // 
2899                 // We will link input check failures from the forwards matching path back to the code
2900                 // that can handle them.
2901                 YarrOp* prevOp = beginOp;
2902                 YarrOp* nextOp = &m_ops[beginOp->m_nextOp];
2903                 while (nextOp->m_op != OpBodyAlternativeEnd) {
2904                     prevOp->m_jumps.link(this);
2905
2906                     // We only get here if an input check fails, it is only worth checking again
2907                     // if the next alternative has a minimum size less than the last.
2908                     if (prevOp->m_alternative->m_minimumSize > nextOp->m_alternative->m_minimumSize) {
2909                         // FIXME: if we added an extra label to YarrOp, we could avoid needing to
2910                         // subtract delta back out, and reduce this code. Should performance test
2911                         // the benefit of this.
2912                         unsigned delta = prevOp->m_alternative->m_minimumSize - nextOp->m_alternative->m_minimumSize;
2913                         sub32(Imm32(delta), index);
2914                         Jump fail = jumpIfNoAvailableInput();
2915                         add32(Imm32(delta), index);
2916                         jump(nextOp->m_reentry);
2917                         fail.link(this);
2918                     } else if (prevOp->m_alternative->m_minimumSize < nextOp->m_alternative->m_minimumSize)
2919                         add32(Imm32(nextOp->m_alternative->m_minimumSize - prevOp->m_alternative->m_minimumSize), index);
2920                     prevOp = nextOp;
2921                     nextOp = &m_ops[nextOp->m_nextOp];
2922                 }
2923
2924                 // We fall through to here if there is insufficient input to run the last alternative.
2925
2926                 // If there is insufficient input to run the last alternative, then for 'once through'
2927                 // alternatives we are done - just jump back up into the forwards matching path at the End.
2928                 if (onceThrough) {
2929                     op.m_jumps.linkTo(endOp.m_reentry, this);
2930                     jump(endOp.m_reentry);
2931                     break;
2932                 }
2933
2934                 // For repeating alternatives, link any input check failure from the last alternative to
2935                 // this point.
2936                 op.m_jumps.link(this);
2937
2938                 bool needsToUpdateMatchStart = !m_pattern.m_body->m_hasFixedSize;
2939
2940                 // Check for cases where input position is already incremented by 1 for the last
2941                 // alternative (this is particularly useful where the minimum size of the body
2942                 // disjunction is 0, e.g. /a*|b/).
2943                 if (needsToUpdateMatchStart && alternative->m_minimumSize == 1) {
2944                     // index is already incremented by 1, so just store it now!
2945                     setMatchStart(index);
2946                     needsToUpdateMatchStart = false;
2947                 }
2948
2949                 if (!m_pattern.sticky()) {
2950                     // Check whether there is sufficient input to loop. Increment the input position by
2951                     // one, and check. Also add in the minimum disjunction size before checking - there
2952                     // is no point in looping if we're just going to fail all the input checks around
2953                     // the next iteration.
2954                     ASSERT(alternative->m_minimumSize >= m_pattern.m_body->m_minimumSize);
2955                     if (alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) {
2956                         // If the last alternative had the same minimum size as the disjunction,
2957                         // just simply increment input pos by 1, no adjustment based on minimum size.
2958                         add32(TrustedImm32(1), index);
2959                     } else {
2960                         // If the minumum for the last alternative was one greater than than that
2961                         // for the disjunction, we're already progressed by 1, nothing to do!
2962                         unsigned delta = (alternative->m_minimumSize - m_pattern.m_body->m_minimumSize) - 1;
2963                         if (delta)
2964                             sub32(Imm32(delta), index);
2965                     }
2966                     Jump matchFailed = jumpIfNoAvailableInput();
2967
2968                     if (needsToUpdateMatchStart) {
2969                         if (!m_pattern.m_body->m_minimumSize)
2970                             setMatchStart(index);
2971                         else {
2972                             move(index, regT0);
2973                             sub32(Imm32(m_pattern.m_body->m_minimumSize), regT0);
2974                             setMatchStart(regT0);
2975                         }
2976                     }
2977
2978                     // Calculate how much more input the first alternative requires than the minimum
2979                     // for the body as a whole. If no more is needed then we dont need an additional
2980                     // input check here - jump straight back up to the start of the first alternative.
2981                     if (beginOp->m_alternative->m_minimumSize == m_pattern.m_body->m_minimumSize)
2982                         jump(beginOp->m_reentry);
2983                     else {
2984                         if (beginOp->m_alternative->m_minimumSize > m_pattern.m_body->m_minimumSize)
2985                             add32(Imm32(beginOp->m_alternative->m_minimumSize - m_pattern.m_body->m_minimumSize), index);
2986                         else
2987                             sub32(Imm32(m_pattern.m_body->m_minimumSize - beginOp->m_alternative->m_minimumSize), index);
2988                         checkInput().linkTo(beginOp->m_reentry, this);
2989                         jump(firstInputCheckFailed);
2990                     }
2991
2992                     // We jump to here if we iterate to the point that there is insufficient input to
2993                     // run any matches, and need to return a failure state from JIT code.
2994                     matchFailed.link(this);
2995                 }
2996
2997                 lastStickyAlternativeFailures.link(this);
2998                 removeCallFrame();
2999                 generateFailReturn();
3000                 break;
3001             }
3002             case OpBodyAlternativeEnd: {
3003                 // We should never backtrack back into a body disjunction.
3004                 ASSERT(m_backtrackingState.isEmpty());
3005
3006                 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
3007                 m_checkedOffset += priorAlternative->m_minimumSize;
3008                 break;
3009             }
3010
3011             // OpSimpleNestedAlternativeBegin/Next/End
3012             // OpNestedAlternativeBegin/Next/End
3013             //
3014             // Generate code for when we backtrack back out of an alternative into
3015             // a Begin or Next node, or when the entry input count check fails. If
3016             // there are more alternatives we need to jump to the next alternative,
3017             // if not we backtrack back out of the current set of parentheses.
3018             //
3019             // In the case of non-simple nested assertions we need to also link the
3020             // 'return address' appropriately to backtrack back out into the correct
3021             // alternative.
3022             case OpSimpleNestedAlternativeBegin:
3023             case OpSimpleNestedAlternativeNext:
3024             case OpNestedAlternativeBegin:
3025             case OpNestedAlternativeNext: {
3026                 YarrOp& nextOp = m_ops[op.m_nextOp];
3027                 bool isBegin = op.m_previousOp == notFound;
3028                 bool isLastAlternative = nextOp.m_nextOp == notFound;
3029                 ASSERT(isBegin == (op.m_op == OpSimpleNestedAlternativeBegin || op.m_op == OpNestedAlternativeBegin));
3030                 ASSERT(isLastAlternative == (nextOp.m_op == OpSimpleNestedAlternativeEnd || nextOp.m_op == OpNestedAlternativeEnd));
3031
3032                 // Treat an input check failure the same as a failed match.
3033                 m_backtrackingState.append(op.m_jumps);
3034
3035                 // Set the backtracks to jump to the appropriate place. We may need
3036                 // to link the backtracks in one of three different way depending on
3037                 // the type of alternative we are dealing with:
3038                 //  - A single alternative, with no simplings.
3039                 //  - The last alternative of a set of two or more.
3040                 //  - An alternative other than the last of a set of two or more.
3041                 //
3042                 // In the case of a single alternative on its own, we don't need to
3043                 // jump anywhere - if the alternative fails to match we can just
3044                 // continue to backtrack out of the parentheses without jumping.
3045                 //
3046                 // In the case of the last alternative in a set of more than one, we
3047                 // need to jump to return back out to the beginning. We'll do so by
3048                 // adding a jump to the End node's m_jumps list, and linking this
3049                 // when we come to generate the Begin node. For alternatives other
3050                 // than the last, we need to jump to the next alternative.
3051                 //
3052                 // If the alternative had adjusted the input position we must link
3053                 // backtracking to here, correct, and then jump on. If not we can
3054                 // link the backtracks directly to their destination.
3055                 if (op.m_checkAdjust) {
3056                     // Handle the cases where we need to link the backtracks here.
3057                     m_backtrackingState.link(this);
3058                     sub32(Imm32(op.m_checkAdjust.unsafeGet()), index);
3059                     if (!isLastAlternative) {
3060                         // An alternative that is not the last should jump to its successor.
3061                         jump(nextOp.m_reentry);
3062                     } else if (!isBegin) {
3063                         // The last of more than one alternatives must jump back to the beginning.
3064                         nextOp.m_jumps.append(jump());
3065                     } else {
3066                         // A single alternative on its own can fall through.
3067                         m_backtrackingState.fallthrough();
3068                     }
3069                 } else {
3070                     // Handle the cases where we can link the backtracks directly to their destinations.
3071                     if (!isLastAlternative) {
3072                         // An alternative that is not the last should jump to its successor.
3073                         m_backtrackingState.linkTo(nextOp.m_reentry, this);
3074                     } else if (!isBegin) {
3075                         // The last of more than one alternatives must jump back to the beginning.
3076                         m_backtrackingState.takeBacktracksToJumpList(nextOp.m_jumps, this);
3077                     }
3078                     // In the case of a single alternative on its own do nothing - it can fall through.
3079                 }
3080
3081                 // If there is a backtrack jump from a zero length match link it here.
3082                 if (op.m_zeroLengthMatch.isSet())
3083                     m_backtrackingState.append(op.m_zeroLengthMatch);
3084
3085                 // At this point we've handled the backtracking back into this node.
3086                 // Now link any backtracks that need to jump to here.
3087
3088                 // For non-simple alternatives, link the alternative's 'return address'
3089                 // so that we backtrack back out into the previous alternative.
3090