[WTF] Add makeUnique<T>, which ensures T is fast-allocated, makeUnique / makeUniqueWi...
[WebKit-https.git] / Source / JavaScriptCore / yarr / YarrJIT.cpp
1 /*
2  * Copyright (C) 2009-2018 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
24  */
25
26 #include "config.h"
27 #include "YarrJIT.h"
28
29 #include <wtf/ASCIICType.h>
30 #include "LinkBuffer.h"
31 #include "Options.h"
32 #include "VM.h"
33 #include "Yarr.h"
34 #include "YarrCanonicalize.h"
35 #include "YarrDisassembler.h"
36
37 #if ENABLE(YARR_JIT)
38
39 namespace JSC { namespace Yarr {
40
41 template<YarrJITCompileMode compileMode>
42 class YarrGenerator : public YarrJITInfo, private MacroAssembler {
43
44 #if CPU(ARM_THUMB2)
45     static const RegisterID input = ARMRegisters::r0;
46     static const RegisterID index = ARMRegisters::r1;
47     static const RegisterID length = ARMRegisters::r2;
48     static const RegisterID output = ARMRegisters::r3;
49
50     static const RegisterID regT0 = ARMRegisters::r4;
51     static const RegisterID regT1 = ARMRegisters::r5;
52     static const RegisterID initialStart = ARMRegisters::r8;
53
54     static const RegisterID returnRegister = ARMRegisters::r0;
55     static const RegisterID returnRegister2 = ARMRegisters::r1;
56
57 #define HAVE_INITIAL_START_REG
58 #elif CPU(ARM64)
59     // Argument registers
60     static const RegisterID input = ARM64Registers::x0;
61     static const RegisterID index = ARM64Registers::x1;
62     static const RegisterID length = ARM64Registers::x2;
63     static const RegisterID output = ARM64Registers::x3;
64     static const RegisterID freelistRegister = ARM64Registers::x4;
65     static const RegisterID freelistSizeRegister = ARM64Registers::x5;
66
67     // Scratch registers
68     static const RegisterID regT0 = ARM64Registers::x6;
69     static const RegisterID regT1 = ARM64Registers::x7;
70     static const RegisterID regT2 = ARM64Registers::x8;
71     static const RegisterID remainingMatchCount = ARM64Registers::x9;
72     static const RegisterID regUnicodeInputAndTrail = ARM64Registers::x10;
73     static const RegisterID initialStart = ARM64Registers::x11;
74     static const RegisterID supplementaryPlanesBase = ARM64Registers::x12;
75     static const RegisterID leadingSurrogateTag = ARM64Registers::x13;
76     static const RegisterID trailingSurrogateTag = ARM64Registers::x14;
77     static const RegisterID endOfStringAddress = ARM64Registers::x15;
78
79     static const RegisterID returnRegister = ARM64Registers::x0;
80     static const RegisterID returnRegister2 = ARM64Registers::x1;
81
82     const TrustedImm32 surrogateTagMask = TrustedImm32(0xfffffc00);
83 #define HAVE_INITIAL_START_REG
84 #define JIT_UNICODE_EXPRESSIONS
85 #elif CPU(MIPS)
86     static const RegisterID input = MIPSRegisters::a0;
87     static const RegisterID index = MIPSRegisters::a1;
88     static const RegisterID length = MIPSRegisters::a2;
89     static const RegisterID output = MIPSRegisters::a3;
90
91     static const RegisterID regT0 = MIPSRegisters::t4;
92     static const RegisterID regT1 = MIPSRegisters::t5;
93     static const RegisterID initialStart = MIPSRegisters::t6;
94
95     static const RegisterID returnRegister = MIPSRegisters::v0;
96     static const RegisterID returnRegister2 = MIPSRegisters::v1;
97
98 #define HAVE_INITIAL_START_REG
99 #elif CPU(X86)
100     static const RegisterID input = X86Registers::eax;
101     static const RegisterID index = X86Registers::edx;
102     static const RegisterID length = X86Registers::ecx;
103     static const RegisterID output = X86Registers::edi;
104
105     static const RegisterID regT0 = X86Registers::ebx;
106     static const RegisterID regT1 = X86Registers::esi;
107
108     static const RegisterID returnRegister = X86Registers::eax;
109     static const RegisterID returnRegister2 = X86Registers::edx;
110 #elif CPU(X86_64)
111 #if !OS(WINDOWS)
112     // Argument registers
113     static const RegisterID input = X86Registers::edi;
114     static const RegisterID index = X86Registers::esi;
115     static const RegisterID length = X86Registers::edx;
116     static const RegisterID output = X86Registers::ecx;
117     static const RegisterID freelistRegister = X86Registers::r8;
118     static const RegisterID freelistSizeRegister = X86Registers::r9; // Only used during initialization.
119 #else
120     // If the return value doesn't fit in 64bits, its destination is pointed by rcx and the parameters are shifted.
121     // http://msdn.microsoft.com/en-us/library/7572ztz4.aspx
122     COMPILE_ASSERT(sizeof(MatchResult) > sizeof(void*), MatchResult_does_not_fit_in_64bits);
123     static const RegisterID input = X86Registers::edx;
124     static const RegisterID index = X86Registers::r8;
125     static const RegisterID length = X86Registers::r9;
126     static const RegisterID output = X86Registers::r10;
127 #endif
128
129     // Scratch registers
130     static const RegisterID regT0 = X86Registers::eax;
131 #if !OS(WINDOWS)
132     static const RegisterID regT1 = X86Registers::r9;
133     static const RegisterID regT2 = X86Registers::r10;
134 #else
135     static const RegisterID regT1 = X86Registers::ecx;
136     static const RegisterID regT2 = X86Registers::edi;
137 #endif
138
139     static const RegisterID initialStart = X86Registers::ebx;
140 #if !OS(WINDOWS)
141     static const RegisterID remainingMatchCount = X86Registers::r12;
142 #else
143     static const RegisterID remainingMatchCount = X86Registers::esi;
144 #endif
145     static const RegisterID regUnicodeInputAndTrail = X86Registers::r13;
146     static const RegisterID leadingSurrogateTag = X86Registers::r14;
147     static const RegisterID endOfStringAddress = X86Registers::r15;
148
149     static const RegisterID returnRegister = X86Registers::eax;
150     static const RegisterID returnRegister2 = X86Registers::edx;
151
152     const TrustedImm32 supplementaryPlanesBase = TrustedImm32(0x10000);
153     const TrustedImm32 trailingSurrogateTag = TrustedImm32(0xdc00);
154     const TrustedImm32 surrogateTagMask = TrustedImm32(0xfffffc00);
155 #define HAVE_INITIAL_START_REG
156 #define JIT_UNICODE_EXPRESSIONS
157 #endif
158
159 #if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
160     struct ParenContextSizes {
161         size_t m_numSubpatterns;
162         size_t m_frameSlots;
163
164         ParenContextSizes(size_t numSubpatterns, size_t frameSlots)
165             : m_numSubpatterns(numSubpatterns)
166             , m_frameSlots(frameSlots)
167         {
168         }
169
170         size_t numSubpatterns() { return m_numSubpatterns; }
171
172         size_t frameSlots() { return m_frameSlots; }
173     };
174
175     struct ParenContext {
176         struct ParenContext* next;
177         uint32_t begin;
178         uint32_t matchAmount;
179         uintptr_t returnAddress;
180         struct Subpatterns {
181             unsigned start;
182             unsigned end;
183         } subpatterns[0];
184         uintptr_t frameSlots[0];
185
186         static size_t sizeFor(ParenContextSizes& parenContextSizes)
187         {
188             return sizeof(ParenContext) + sizeof(Subpatterns) * parenContextSizes.numSubpatterns() + sizeof(uintptr_t) * parenContextSizes.frameSlots();
189         }
190
191         static ptrdiff_t nextOffset()
192         {
193             return offsetof(ParenContext, next);
194         }
195
196         static ptrdiff_t beginOffset()
197         {
198             return offsetof(ParenContext, begin);
199         }
200
201         static ptrdiff_t matchAmountOffset()
202         {
203             return offsetof(ParenContext, matchAmount);
204         }
205
206         static ptrdiff_t returnAddressOffset()
207         {
208             return offsetof(ParenContext, returnAddress);
209         }
210
211         static ptrdiff_t subpatternOffset(size_t subpattern)
212         {
213             return offsetof(ParenContext, subpatterns) + (subpattern - 1) * sizeof(Subpatterns);
214         }
215
216         static ptrdiff_t savedFrameOffset(ParenContextSizes& parenContextSizes)
217         {
218             return offsetof(ParenContext, subpatterns) + (parenContextSizes.numSubpatterns()) * sizeof(Subpatterns);
219         }
220     };
221
222     void initParenContextFreeList()
223     {
224         RegisterID parenContextPointer = regT0;
225         RegisterID nextParenContextPointer = regT2;
226
227         size_t parenContextSize = ParenContext::sizeFor(m_parenContextSizes);
228
229         parenContextSize = WTF::roundUpToMultipleOf<sizeof(uintptr_t)>(parenContextSize);
230
231         if (parenContextSize > VM::patternContextBufferSize) {
232             m_failureReason = JITFailureReason::ParenthesisNestedTooDeep;
233             return;
234         }
235
236         Jump emptyFreeList = branchTestPtr(Zero, freelistRegister);
237         move(freelistRegister, parenContextPointer);
238         addPtr(TrustedImm32(parenContextSize), freelistRegister, nextParenContextPointer);
239         addPtr(freelistRegister, freelistSizeRegister);
240         subPtr(TrustedImm32(parenContextSize), freelistSizeRegister);
241
242         Label loopTop(this);
243         Jump initDone = branchPtr(Above, nextParenContextPointer, freelistSizeRegister);
244         storePtr(nextParenContextPointer, Address(parenContextPointer, ParenContext::nextOffset()));
245         move(nextParenContextPointer, parenContextPointer);
246         addPtr(TrustedImm32(parenContextSize), parenContextPointer, nextParenContextPointer);
247         jump(loopTop);
248
249         initDone.link(this);
250         storePtr(TrustedImmPtr(nullptr), Address(parenContextPointer, ParenContext::nextOffset()));
251         emptyFreeList.link(this);
252     }
253
254     void allocateParenContext(RegisterID result)
255     {
256         m_abortExecution.append(branchTestPtr(Zero, freelistRegister));
257         sub32(TrustedImm32(1), remainingMatchCount);
258         m_hitMatchLimit.append(branchTestPtr(Zero, remainingMatchCount));
259         move(freelistRegister, result);
260         loadPtr(Address(freelistRegister, ParenContext::nextOffset()), freelistRegister);
261     }
262
263     void freeParenContext(RegisterID headPtrRegister, RegisterID newHeadPtrRegister)
264     {
265         loadPtr(Address(headPtrRegister, ParenContext::nextOffset()), newHeadPtrRegister);
266         storePtr(freelistRegister, Address(headPtrRegister, ParenContext::nextOffset()));
267         move(headPtrRegister, freelistRegister);
268     }
269
270     void saveParenContext(RegisterID parenContextReg, RegisterID tempReg, unsigned firstSubpattern, unsigned lastSubpattern, unsigned subpatternBaseFrameLocation)
271     {
272         store32(index, Address(parenContextReg, ParenContext::beginOffset()));
273         loadFromFrame(subpatternBaseFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), tempReg);
274         store32(tempReg, Address(parenContextReg, ParenContext::matchAmountOffset()));
275         loadFromFrame(subpatternBaseFrameLocation + BackTrackInfoParentheses::returnAddressIndex(), tempReg);
276         storePtr(tempReg, Address(parenContextReg, ParenContext::returnAddressOffset()));
277         if (compileMode == IncludeSubpatterns) {
278             for (unsigned subpattern = firstSubpattern; subpattern <= lastSubpattern; subpattern++) {
279                 loadPtr(Address(output, (subpattern << 1) * sizeof(unsigned)), tempReg);
280                 storePtr(tempReg, Address(parenContextReg, ParenContext::subpatternOffset(subpattern)));
281                 clearSubpatternStart(subpattern);
282             }
283         }
284         subpatternBaseFrameLocation += YarrStackSpaceForBackTrackInfoParentheses;
285         for (unsigned frameLocation = subpatternBaseFrameLocation; frameLocation < m_parenContextSizes.frameSlots(); frameLocation++) {
286             loadFromFrame(frameLocation, tempReg);
287             storePtr(tempReg, Address(parenContextReg, ParenContext::savedFrameOffset(m_parenContextSizes) + frameLocation * sizeof(uintptr_t)));
288         }
289     }
290
291     void restoreParenContext(RegisterID parenContextReg, RegisterID tempReg, unsigned firstSubpattern, unsigned lastSubpattern, unsigned subpatternBaseFrameLocation)
292     {
293         load32(Address(parenContextReg, ParenContext::beginOffset()), index);
294         storeToFrame(index, subpatternBaseFrameLocation + BackTrackInfoParentheses::beginIndex());
295         load32(Address(parenContextReg, ParenContext::matchAmountOffset()), tempReg);
296         storeToFrame(tempReg, subpatternBaseFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
297         loadPtr(Address(parenContextReg, ParenContext::returnAddressOffset()), tempReg);
298         storeToFrame(tempReg, subpatternBaseFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
299         if (compileMode == IncludeSubpatterns) {
300             for (unsigned subpattern = firstSubpattern; subpattern <= lastSubpattern; subpattern++) {
301                 loadPtr(Address(parenContextReg, ParenContext::subpatternOffset(subpattern)), tempReg);
302                 storePtr(tempReg, Address(output, (subpattern << 1) * sizeof(unsigned)));
303             }
304         }
305         subpatternBaseFrameLocation += YarrStackSpaceForBackTrackInfoParentheses;
306         for (unsigned frameLocation = subpatternBaseFrameLocation; frameLocation < m_parenContextSizes.frameSlots(); frameLocation++) {
307             loadPtr(Address(parenContextReg, ParenContext::savedFrameOffset(m_parenContextSizes) + frameLocation * sizeof(uintptr_t)), tempReg);
308             storeToFrame(tempReg, frameLocation);
309         }
310     }
311 #endif
312
313     void optimizeAlternative(PatternAlternative* alternative)
314     {
315         if (!alternative->m_terms.size())
316             return;
317
318         for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) {
319             PatternTerm& term = alternative->m_terms[i];
320             PatternTerm& nextTerm = alternative->m_terms[i + 1];
321
322             // We can move BMP only character classes after fixed character terms.
323             if ((term.type == PatternTerm::TypeCharacterClass)
324                 && (term.quantityType == QuantifierFixedCount)
325                 && (!m_decodeSurrogatePairs || (term.characterClass->hasOneCharacterSize() && !term.m_invert))
326                 && (nextTerm.type == PatternTerm::TypePatternCharacter)
327                 && (nextTerm.quantityType == QuantifierFixedCount)) {
328                 PatternTerm termCopy = term;
329                 alternative->m_terms[i] = nextTerm;
330                 alternative->m_terms[i + 1] = termCopy;
331             }
332         }
333     }
334
335     void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar32* matches, unsigned matchCount)
336     {
337         do {
338             // pick which range we're going to generate
339             int which = count >> 1;
340             char lo = ranges[which].begin;
341             char hi = ranges[which].end;
342
343             // check if there are any ranges or matches below lo.  If not, just jl to failure -
344             // if there is anything else to check, check that first, if it falls through jmp to failure.
345             if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
346                 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
347
348                 // generate code for all ranges before this one
349                 if (which)
350                     matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
351
352                 while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
353                     matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
354                     ++*matchIndex;
355                 }
356                 failures.append(jump());
357
358                 loOrAbove.link(this);
359             } else if (which) {
360                 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
361
362                 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
363                 failures.append(jump());
364
365                 loOrAbove.link(this);
366             } else
367                 failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));
368
369             while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
370                 ++*matchIndex;
371
372             matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
373             // fall through to here, the value is above hi.
374
375             // shuffle along & loop around if there are any more matches to handle.
376             unsigned next = which + 1;
377             ranges += next;
378             count -= next;
379         } while (count);
380     }
381
382     void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
383     {
384         if (charClass->m_table && !m_decodeSurrogatePairs) {
385             ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table));
386             matchDest.append(branchTest8(charClass->m_tableInverted ? Zero : NonZero, tableEntry));
387             return;
388         }
389
390         JumpList unicodeFail;
391         if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
392             JumpList isAscii;
393             if (charClass->m_matches.size() || charClass->m_ranges.size())
394                 isAscii.append(branch32(LessThanOrEqual, character, TrustedImm32(0x7f)));
395
396             if (charClass->m_matchesUnicode.size()) {
397                 for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
398                     UChar32 ch = charClass->m_matchesUnicode[i];
399                     matchDest.append(branch32(Equal, character, Imm32(ch)));
400                 }
401             }
402
403             if (charClass->m_rangesUnicode.size()) {
404                 for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
405                     UChar32 lo = charClass->m_rangesUnicode[i].begin;
406                     UChar32 hi = charClass->m_rangesUnicode[i].end;
407
408                     Jump below = branch32(LessThan, character, Imm32(lo));
409                     matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
410                     below.link(this);
411                 }
412             }
413
414             if (charClass->m_matches.size() || charClass->m_ranges.size())
415                 unicodeFail = jump();
416             isAscii.link(this);
417         }
418
419         if (charClass->m_ranges.size()) {
420             unsigned matchIndex = 0;
421             JumpList failures;
422             matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size());
423             while (matchIndex < charClass->m_matches.size())
424                 matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++])));
425
426             failures.link(this);
427         } else if (charClass->m_matches.size()) {
428             // optimization: gather 'a','A' etc back together, can mask & test once.
429             Vector<char> matchesAZaz;
430
431             for (unsigned i = 0; i < charClass->m_matches.size(); ++i) {
432                 char ch = charClass->m_matches[i];
433                 if (m_pattern.ignoreCase()) {
434                     if (isASCIILower(ch)) {
435                         matchesAZaz.append(ch);
436                         continue;
437                     }
438                     if (isASCIIUpper(ch))
439                         continue;
440                 }
441                 matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
442             }
443
444             if (unsigned countAZaz = matchesAZaz.size()) {
445                 or32(TrustedImm32(32), character);
446                 for (unsigned i = 0; i < countAZaz; ++i)
447                     matchDest.append(branch32(Equal, character, TrustedImm32(matchesAZaz[i])));
448             }
449         }
450
451         if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size())
452             unicodeFail.link(this);
453     }
454
455 #ifdef JIT_UNICODE_EXPRESSIONS
456     void advanceIndexAfterCharacterClassTermMatch(const PatternTerm* term, JumpList& failures, const RegisterID character)
457     {
458         ASSERT(term->type == PatternTerm::TypeCharacterClass);
459
460         if (term->characterClass->hasOneCharacterSize() && !term->invert())
461             add32(TrustedImm32(term->characterClass->hasNonBMPCharacters() ? 2 : 1), index);
462         else {
463             add32(TrustedImm32(1), index);
464             failures.append(atEndOfInput());
465             Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
466             add32(TrustedImm32(1), index);
467             isBMPChar.link(this);
468         }
469     }
470 #endif
471
472     // Jumps if input not available; will have (incorrectly) incremented already!
473     Jump jumpIfNoAvailableInput(unsigned countToCheck = 0)
474     {
475         if (countToCheck)
476             add32(Imm32(countToCheck), index);
477         return branch32(Above, index, length);
478     }
479
480     Jump jumpIfAvailableInput(unsigned countToCheck)
481     {
482         add32(Imm32(countToCheck), index);
483         return branch32(BelowOrEqual, index, length);
484     }
485
486     Jump checkNotEnoughInput(RegisterID additionalAmount)
487     {
488         add32(index, additionalAmount);
489         return branch32(Above, additionalAmount, length);
490     }
491
492     Jump checkInput()
493     {
494         return branch32(BelowOrEqual, index, length);
495     }
496
497     Jump atEndOfInput()
498     {
499         return branch32(Equal, index, length);
500     }
501
502     Jump notAtEndOfInput()
503     {
504         return branch32(NotEqual, index, length);
505     }
506
507     BaseIndex negativeOffsetIndexedAddress(Checked<unsigned> negativeCharacterOffset, RegisterID tempReg, RegisterID indexReg = index)
508     {
509         RegisterID base = input;
510
511         // BaseIndex() addressing can take a int32_t offset. Given that we can have a regular
512         // expression that has unsigned character offsets, BaseIndex's signed offset is insufficient
513         // for addressing in extreme cases where we might underflow. Therefore we check to see if
514         // negativeCharacterOffset will underflow directly or after converting for 16 bit characters.
515         // If so, we do our own address calculating by adjusting the base, using the result register
516         // as a temp address register.
517         unsigned maximumNegativeOffsetForCharacterSize = m_charSize == Char8 ? 0x7fffffff : 0x3fffffff;
518         unsigned offsetAdjustAmount = 0x40000000;
519         if (negativeCharacterOffset.unsafeGet() > maximumNegativeOffsetForCharacterSize) {
520             base = tempReg;
521             move(input, base);
522             while (negativeCharacterOffset.unsafeGet() > maximumNegativeOffsetForCharacterSize) {
523                 subPtr(TrustedImm32(offsetAdjustAmount), base);
524                 if (m_charSize != Char8)
525                     subPtr(TrustedImm32(offsetAdjustAmount), base);
526                 negativeCharacterOffset -= offsetAdjustAmount;
527             }
528         }
529
530         Checked<int32_t> characterOffset(-static_cast<int32_t>(negativeCharacterOffset.unsafeGet()));
531
532         if (m_charSize == Char8)
533             return BaseIndex(input, indexReg, TimesOne, (characterOffset * static_cast<int32_t>(sizeof(char))).unsafeGet());
534
535         return BaseIndex(input, indexReg, TimesTwo, (characterOffset * static_cast<int32_t>(sizeof(UChar))).unsafeGet());
536     }
537
538 #ifdef JIT_UNICODE_EXPRESSIONS
539     void tryReadUnicodeCharImpl(RegisterID resultReg)
540     {
541         ASSERT(m_charSize == Char16);
542
543         JumpList notUnicode;
544
545         load16Unaligned(regUnicodeInputAndTrail, resultReg);
546         and32(surrogateTagMask, resultReg, regT2);
547         notUnicode.append(branch32(NotEqual, regT2, leadingSurrogateTag));
548         addPtr(TrustedImm32(2), regUnicodeInputAndTrail);
549         notUnicode.append(branchPtr(AboveOrEqual, regUnicodeInputAndTrail, endOfStringAddress));
550         load16Unaligned(Address(regUnicodeInputAndTrail), regUnicodeInputAndTrail);
551         and32(surrogateTagMask, regUnicodeInputAndTrail, regT2);
552         notUnicode.append(branch32(NotEqual, regT2, trailingSurrogateTag));
553         sub32(leadingSurrogateTag, resultReg);
554         sub32(trailingSurrogateTag, regUnicodeInputAndTrail);
555         lshift32(TrustedImm32(10), resultReg);
556         or32(regUnicodeInputAndTrail, resultReg);
557         add32(supplementaryPlanesBase, resultReg);
558         notUnicode.link(this);
559     }
560
561     void tryReadUnicodeChar(BaseIndex address, RegisterID resultReg)
562     {
563         ASSERT(m_charSize == Char16);
564
565         getEffectiveAddress(address, regUnicodeInputAndTrail);
566
567         if (resultReg == regT0)
568             m_tryReadUnicodeCharacterCalls.append(nearCall());
569         else
570             tryReadUnicodeCharImpl(resultReg);
571     }
572 #endif
573
574     void readCharacterDontDecodeSurrogates(Checked<unsigned> negativeCharacterOffset, RegisterID resultReg, RegisterID indexReg = index)
575     {
576         BaseIndex address = negativeOffsetIndexedAddress(negativeCharacterOffset, resultReg, indexReg);
577         
578         if (m_charSize == Char8)
579             load8(address, resultReg);
580         else
581             load16Unaligned(address, resultReg);
582     }
583     
584     void readCharacter(Checked<unsigned> negativeCharacterOffset, RegisterID resultReg, RegisterID indexReg = index)
585     {
586         BaseIndex address = negativeOffsetIndexedAddress(negativeCharacterOffset, resultReg, indexReg);
587
588         if (m_charSize == Char8)
589             load8(address, resultReg);
590 #ifdef JIT_UNICODE_EXPRESSIONS
591         else if (m_decodeSurrogatePairs)
592             tryReadUnicodeChar(address, resultReg);
593 #endif
594         else
595             load16Unaligned(address, resultReg);
596     }
597
598     Jump jumpIfCharNotEquals(UChar32 ch, Checked<unsigned> negativeCharacterOffset, RegisterID character)
599     {
600         readCharacter(negativeCharacterOffset, character);
601
602         // For case-insesitive compares, non-ascii characters that have different
603         // upper & lower case representations are converted to a character class.
604         ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(ch) || isCanonicallyUnique(ch, m_canonicalMode));
605         if (m_pattern.ignoreCase() && isASCIIAlpha(ch)) {
606             or32(TrustedImm32(0x20), character);
607             ch |= 0x20;
608         }
609
610         return branch32(NotEqual, character, Imm32(ch));
611     }
612     
613     void storeToFrame(RegisterID reg, unsigned frameLocation)
614     {
615         poke(reg, frameLocation);
616     }
617
618     void storeToFrame(TrustedImm32 imm, unsigned frameLocation)
619     {
620         poke(imm, frameLocation);
621     }
622
623 #if CPU(ARM64) || CPU(X86_64)
624     void storeToFrame(TrustedImmPtr imm, unsigned frameLocation)
625     {
626         poke(imm, frameLocation);
627     }
628 #endif
629
630     DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
631     {
632         return storePtrWithPatch(TrustedImmPtr(nullptr), Address(stackPointerRegister, frameLocation * sizeof(void*)));
633     }
634
635     void loadFromFrame(unsigned frameLocation, RegisterID reg)
636     {
637         peek(reg, frameLocation);
638     }
639
640     void loadFromFrameAndJump(unsigned frameLocation)
641     {
642         farJump(Address(stackPointerRegister, frameLocation * sizeof(void*)), YarrBacktrackPtrTag);
643     }
644
645     unsigned alignCallFrameSizeInBytes(unsigned callFrameSize)
646     {
647         if (!callFrameSize)
648             return 0;
649
650         callFrameSize *= sizeof(void*);
651         if (callFrameSize / sizeof(void*) != m_pattern.m_body->m_callFrameSize)
652             CRASH();
653         callFrameSize = (callFrameSize + 0x3f) & ~0x3f;
654         return callFrameSize;
655     }
656     void initCallFrame()
657     {
658         unsigned callFrameSizeInBytes = alignCallFrameSizeInBytes(m_pattern.m_body->m_callFrameSize);
659         if (callFrameSizeInBytes) {
660 #if CPU(X86_64) || CPU(ARM64)
661             if (Options::zeroStackFrame()) {
662                 // We need to start from the stack pointer, because we could have spilled callee saves
663                 move(stackPointerRegister, regT0);
664                 subPtr(Imm32(callFrameSizeInBytes), stackPointerRegister);
665                 if (callFrameSizeInBytes <= 128) {
666                     for (unsigned offset = 0; offset < callFrameSizeInBytes; offset += sizeof(intptr_t))
667                         storePtr(TrustedImm32(0), Address(regT0, -8 - offset));
668                 } else {
669                     Label zeroLoop = label();
670                     subPtr(TrustedImm32(sizeof(intptr_t) * 2), regT0);
671 #if CPU(ARM64)
672                     storePair64(ARM64Registers::zr, ARM64Registers::zr, regT0);
673 #else
674                     storePtr(TrustedImm32(0), Address(regT0));
675                     storePtr(TrustedImm32(0), Address(regT0, sizeof(intptr_t)));
676 #endif
677                     branchPtr(NotEqual, regT0, stackPointerRegister).linkTo(zeroLoop, this);
678                 }
679             } else
680 #endif
681                 subPtr(Imm32(callFrameSizeInBytes), stackPointerRegister);
682
683         }
684     }
685     void removeCallFrame()
686     {
687         unsigned callFrameSizeInBytes = alignCallFrameSizeInBytes(m_pattern.m_body->m_callFrameSize);
688         if (callFrameSizeInBytes)
689             addPtr(Imm32(callFrameSizeInBytes), stackPointerRegister);
690     }
691
692     void generateFailReturn()
693     {
694         move(TrustedImmPtr((void*)WTF::notFound), returnRegister);
695         move(TrustedImm32(0), returnRegister2);
696         generateReturn();
697     }
698
699     void generateJITFailReturn()
700     {
701         if (m_abortExecution.empty() && m_hitMatchLimit.empty())
702             return;
703
704         JumpList finishExiting;
705         if (!m_abortExecution.empty()) {
706             m_abortExecution.link(this);
707             move(TrustedImmPtr((void*)static_cast<size_t>(-2)), returnRegister);
708             finishExiting.append(jump());
709         }
710
711         if (!m_hitMatchLimit.empty()) {
712             m_hitMatchLimit.link(this);
713             move(TrustedImmPtr((void*)static_cast<size_t>(-1)), returnRegister);
714         }
715
716         finishExiting.link(this);
717         removeCallFrame();
718         move(TrustedImm32(0), returnRegister2);
719         generateReturn();
720     }
721
722     // Used to record subpatterns, should only be called if compileMode is IncludeSubpatterns.
723     void setSubpatternStart(RegisterID reg, unsigned subpattern)
724     {
725         ASSERT(subpattern);
726         // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
727         store32(reg, Address(output, (subpattern << 1) * sizeof(int)));
728     }
729     void setSubpatternEnd(RegisterID reg, unsigned subpattern)
730     {
731         ASSERT(subpattern);
732         // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
733         store32(reg, Address(output, ((subpattern << 1) + 1) * sizeof(int)));
734     }
735     void clearSubpatternStart(unsigned subpattern)
736     {
737         ASSERT(subpattern);
738         // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
739         store32(TrustedImm32(-1), Address(output, (subpattern << 1) * sizeof(int)));
740     }
741
742     void clearMatches(unsigned subpattern, unsigned lastSubpattern)
743     {
744         for (; subpattern <= lastSubpattern; subpattern++)
745             clearSubpatternStart(subpattern);
746     }
747
748     // We use one of three different strategies to track the start of the current match,
749     // while matching.
750     // 1) If the pattern has a fixed size, do nothing! - we calculate the value lazily
751     //    at the end of matching. This is irrespective of compileMode, and in this case
752     //    these methods should never be called.
753     // 2) If we're compiling IncludeSubpatterns, 'output' contains a pointer to an output
754     //    vector, store the match start in the output vector.
755     // 3) If we're compiling MatchOnly, 'output' is unused, store the match start directly
756     //    in this register.
757     void setMatchStart(RegisterID reg)
758     {
759         ASSERT(!m_pattern.m_body->m_hasFixedSize);
760         if (compileMode == IncludeSubpatterns)
761             store32(reg, output);
762         else
763             move(reg, output);
764     }
765     void getMatchStart(RegisterID reg)
766     {
767         ASSERT(!m_pattern.m_body->m_hasFixedSize);
768         if (compileMode == IncludeSubpatterns)
769             load32(output, reg);
770         else
771             move(output, reg);
772     }
773
774     enum YarrOpCode : uint8_t {
775         // These nodes wrap body alternatives - those in the main disjunction,
776         // rather than subpatterns or assertions. These are chained together in
777         // a doubly linked list, with a 'begin' node for the first alternative,
778         // a 'next' node for each subsequent alternative, and an 'end' node at
779         // the end. In the case of repeating alternatives, the 'end' node also
780         // has a reference back to 'begin'.
781         OpBodyAlternativeBegin,
782         OpBodyAlternativeNext,
783         OpBodyAlternativeEnd,
784         // Similar to the body alternatives, but used for subpatterns with two
785         // or more alternatives.
786         OpNestedAlternativeBegin,
787         OpNestedAlternativeNext,
788         OpNestedAlternativeEnd,
789         // Used for alternatives in subpatterns where there is only a single
790         // alternative (backtracking is easier in these cases), or for alternatives
791         // which never need to be backtracked (those in parenthetical assertions,
792         // terminal subpatterns).
793         OpSimpleNestedAlternativeBegin,
794         OpSimpleNestedAlternativeNext,
795         OpSimpleNestedAlternativeEnd,
796         // Used to wrap 'Once' subpattern matches (quantityMaxCount == 1).
797         OpParenthesesSubpatternOnceBegin,
798         OpParenthesesSubpatternOnceEnd,
799         // Used to wrap 'Terminal' subpattern matches (at the end of the regexp).
800         OpParenthesesSubpatternTerminalBegin,
801         OpParenthesesSubpatternTerminalEnd,
802         // Used to wrap generic captured matches
803         OpParenthesesSubpatternBegin,
804         OpParenthesesSubpatternEnd,
805         // Used to wrap parenthetical assertions.
806         OpParentheticalAssertionBegin,
807         OpParentheticalAssertionEnd,
808         // Wraps all simple terms (pattern characters, character classes).
809         OpTerm,
810         // Where an expression contains only 'once through' body alternatives
811         // and no repeating ones, this op is used to return match failure.
812         OpMatchFailed
813     };
814
815     // This structure is used to hold the compiled opcode information,
816     // including reference back to the original PatternTerm/PatternAlternatives,
817     // and JIT compilation data structures.
818     struct YarrOp {
819         explicit YarrOp(PatternTerm* term)
820             : m_term(term)
821             , m_op(OpTerm)
822             , m_isDeadCode(false)
823         {
824         }
825
826         explicit YarrOp(YarrOpCode op)
827             : m_op(op)
828             , m_isDeadCode(false)
829         {
830         }
831
832         // For alternatives, this holds the PatternAlternative and doubly linked
833         // references to this alternative's siblings. In the case of the
834         // OpBodyAlternativeEnd node at the end of a section of repeating nodes,
835         // m_nextOp will reference the OpBodyAlternativeBegin node of the first
836         // repeating alternative.
837         PatternAlternative* m_alternative;
838         size_t m_previousOp;
839         size_t m_nextOp;
840         
841         // The operation, as a YarrOpCode, and also a reference to the PatternTerm.
842         PatternTerm* m_term;
843         YarrOpCode m_op;
844
845         // Used to record a set of Jumps out of the generated code, typically
846         // used for jumps out to backtracking code, and a single reentry back
847         // into the code for a node (likely where a backtrack will trigger
848         // rematching).
849         Label m_reentry;
850         JumpList m_jumps;
851
852         // Used for backtracking when the prior alternative did not consume any
853         // characters but matched.
854         Jump m_zeroLengthMatch;
855
856         // This flag is used to null out the second pattern character, when
857         // two are fused to match a pair together.
858         bool m_isDeadCode;
859
860         // Currently used in the case of some of the more complex management of
861         // 'm_checkedOffset', to cache the offset used in this alternative, to avoid
862         // recalculating it.
863         Checked<unsigned> m_checkAdjust;
864
865         // Used by OpNestedAlternativeNext/End to hold the pointer to the
866         // value that will be pushed into the pattern's frame to return to,
867         // upon backtracking back into the disjunction.
868         DataLabelPtr m_returnAddress;
869     };
870
871     // BacktrackingState
872     // This class encapsulates information about the state of code generation
873     // whilst generating the code for backtracking, when a term fails to match.
874     // Upon entry to code generation of the backtracking code for a given node,
875     // the Backtracking state will hold references to all control flow sources
876     // that are outputs in need of further backtracking from the prior node
877     // generated (which is the subsequent operation in the regular expression,
878     // and in the m_ops Vector, since we generated backtracking backwards).
879     // These references to control flow take the form of:
880     //  - A jump list of jumps, to be linked to code that will backtrack them
881     //    further.
882     //  - A set of DataLabelPtr values, to be populated with values to be
883     //    treated effectively as return addresses backtracking into complex
884     //    subpatterns.
885     //  - A flag indicating that the current sequence of generated code up to
886     //    this point requires backtracking.
887     class BacktrackingState {
888     public:
889         BacktrackingState()
890             : m_pendingFallthrough(false)
891         {
892         }
893
894         // Add a jump or jumps, a return address, or set the flag indicating
895         // that the current 'fallthrough' control flow requires backtracking.
896         void append(const Jump& jump)
897         {
898             m_laterFailures.append(jump);
899         }
900         void append(JumpList& jumpList)
901         {
902             m_laterFailures.append(jumpList);
903         }
904         void append(const DataLabelPtr& returnAddress)
905         {
906             m_pendingReturns.append(returnAddress);
907         }
908         void fallthrough()
909         {
910             ASSERT(!m_pendingFallthrough);
911             m_pendingFallthrough = true;
912         }
913
914         // These methods clear the backtracking state, either linking to the
915         // current location, a provided label, or copying the backtracking out
916         // to a JumpList. All actions may require code generation to take place,
917         // and as such are passed a pointer to the assembler.
918         void link(MacroAssembler* assembler)
919         {
920             if (m_pendingReturns.size()) {
921                 Label here(assembler);
922                 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
923                     m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
924                 m_pendingReturns.clear();
925             }
926             m_laterFailures.link(assembler);
927             m_laterFailures.clear();
928             m_pendingFallthrough = false;
929         }
930         void linkTo(Label label, MacroAssembler* assembler)
931         {
932             if (m_pendingReturns.size()) {
933                 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
934                     m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], label));
935                 m_pendingReturns.clear();
936             }
937             if (m_pendingFallthrough)
938                 assembler->jump(label);
939             m_laterFailures.linkTo(label, assembler);
940             m_laterFailures.clear();
941             m_pendingFallthrough = false;
942         }
943         void takeBacktracksToJumpList(JumpList& jumpList, MacroAssembler* assembler)
944         {
945             if (m_pendingReturns.size()) {
946                 Label here(assembler);
947                 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
948                     m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
949                 m_pendingReturns.clear();
950                 m_pendingFallthrough = true;
951             }
952             if (m_pendingFallthrough)
953                 jumpList.append(assembler->jump());
954             jumpList.append(m_laterFailures);
955             m_laterFailures.clear();
956             m_pendingFallthrough = false;
957         }
958
959         bool isEmpty()
960         {
961             return m_laterFailures.empty() && m_pendingReturns.isEmpty() && !m_pendingFallthrough;
962         }
963
964         // Called at the end of code generation to link all return addresses.
965         void linkDataLabels(LinkBuffer& linkBuffer)
966         {
967             ASSERT(isEmpty());
968             for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
969                 linkBuffer.patch(m_backtrackRecords[i].m_dataLabel, linkBuffer.locationOf<YarrBacktrackPtrTag>(m_backtrackRecords[i].m_backtrackLocation));
970         }
971
972     private:
973         struct ReturnAddressRecord {
974             ReturnAddressRecord(DataLabelPtr dataLabel, Label backtrackLocation)
975                 : m_dataLabel(dataLabel)
976                 , m_backtrackLocation(backtrackLocation)
977             {
978             }
979
980             DataLabelPtr m_dataLabel;
981             Label m_backtrackLocation;
982         };
983
984         JumpList m_laterFailures;
985         bool m_pendingFallthrough;
986         Vector<DataLabelPtr, 4> m_pendingReturns;
987         Vector<ReturnAddressRecord, 4> m_backtrackRecords;
988     };
989
990     // Generation methods:
991     // ===================
992
993     // This method provides a default implementation of backtracking common
994     // to many terms; terms commonly jump out of the forwards  matching path
995     // on any failed conditions, and add these jumps to the m_jumps list. If
996     // no special handling is required we can often just backtrack to m_jumps.
997     void backtrackTermDefault(size_t opIndex)
998     {
999         YarrOp& op = m_ops[opIndex];
1000         m_backtrackingState.append(op.m_jumps);
1001     }
1002
1003     void generateAssertionBOL(size_t opIndex)
1004     {
1005         YarrOp& op = m_ops[opIndex];
1006         PatternTerm* term = op.m_term;
1007
1008         if (m_pattern.multiline()) {
1009             const RegisterID character = regT0;
1010
1011             JumpList matchDest;
1012             if (!term->inputPosition)
1013                 matchDest.append(branch32(Equal, index, Imm32(m_checkedOffset.unsafeGet())));
1014
1015             readCharacter(m_checkedOffset - term->inputPosition + 1, character);
1016             matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
1017             op.m_jumps.append(jump());
1018
1019             matchDest.link(this);
1020         } else {
1021             // Erk, really should poison out these alternatives early. :-/
1022             if (term->inputPosition)
1023                 op.m_jumps.append(jump());
1024             else
1025                 op.m_jumps.append(branch32(NotEqual, index, Imm32(m_checkedOffset.unsafeGet())));
1026         }
1027     }
1028     void backtrackAssertionBOL(size_t opIndex)
1029     {
1030         backtrackTermDefault(opIndex);
1031     }
1032
1033     void generateAssertionEOL(size_t opIndex)
1034     {
1035         YarrOp& op = m_ops[opIndex];
1036         PatternTerm* term = op.m_term;
1037
1038         if (m_pattern.multiline()) {
1039             const RegisterID character = regT0;
1040
1041             JumpList matchDest;
1042             if (term->inputPosition == m_checkedOffset.unsafeGet())
1043                 matchDest.append(atEndOfInput());
1044
1045             readCharacter(m_checkedOffset - term->inputPosition, character);
1046             matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
1047             op.m_jumps.append(jump());
1048
1049             matchDest.link(this);
1050         } else {
1051             if (term->inputPosition == m_checkedOffset.unsafeGet())
1052                 op.m_jumps.append(notAtEndOfInput());
1053             // Erk, really should poison out these alternatives early. :-/
1054             else
1055                 op.m_jumps.append(jump());
1056         }
1057     }
1058     void backtrackAssertionEOL(size_t opIndex)
1059     {
1060         backtrackTermDefault(opIndex);
1061     }
1062
1063     // Also falls though on nextIsNotWordChar.
1064     void matchAssertionWordchar(size_t opIndex, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
1065     {
1066         YarrOp& op = m_ops[opIndex];
1067         PatternTerm* term = op.m_term;
1068
1069         const RegisterID character = regT0;
1070
1071         if (term->inputPosition == m_checkedOffset.unsafeGet())
1072             nextIsNotWordChar.append(atEndOfInput());
1073
1074         readCharacter(m_checkedOffset - term->inputPosition, character);
1075
1076         CharacterClass* wordcharCharacterClass;
1077
1078         if (m_unicodeIgnoreCase)
1079             wordcharCharacterClass = m_pattern.wordUnicodeIgnoreCaseCharCharacterClass();
1080         else
1081             wordcharCharacterClass = m_pattern.wordcharCharacterClass();
1082
1083         matchCharacterClass(character, nextIsWordChar, wordcharCharacterClass);
1084     }
1085
1086     void generateAssertionWordBoundary(size_t opIndex)
1087     {
1088         YarrOp& op = m_ops[opIndex];
1089         PatternTerm* term = op.m_term;
1090
1091         const RegisterID character = regT0;
1092
1093         Jump atBegin;
1094         JumpList matchDest;
1095         if (!term->inputPosition)
1096             atBegin = branch32(Equal, index, Imm32(m_checkedOffset.unsafeGet()));
1097         readCharacter(m_checkedOffset - term->inputPosition + 1, character);
1098
1099         CharacterClass* wordcharCharacterClass;
1100
1101         if (m_unicodeIgnoreCase)
1102             wordcharCharacterClass = m_pattern.wordUnicodeIgnoreCaseCharCharacterClass();
1103         else
1104             wordcharCharacterClass = m_pattern.wordcharCharacterClass();
1105
1106         matchCharacterClass(character, matchDest, wordcharCharacterClass);
1107         if (!term->inputPosition)
1108             atBegin.link(this);
1109
1110         // We fall through to here if the last character was not a wordchar.
1111         JumpList nonWordCharThenWordChar;
1112         JumpList nonWordCharThenNonWordChar;
1113         if (term->invert()) {
1114             matchAssertionWordchar(opIndex, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
1115             nonWordCharThenWordChar.append(jump());
1116         } else {
1117             matchAssertionWordchar(opIndex, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
1118             nonWordCharThenNonWordChar.append(jump());
1119         }
1120         op.m_jumps.append(nonWordCharThenNonWordChar);
1121
1122         // We jump here if the last character was a wordchar.
1123         matchDest.link(this);
1124         JumpList wordCharThenWordChar;
1125         JumpList wordCharThenNonWordChar;
1126         if (term->invert()) {
1127             matchAssertionWordchar(opIndex, wordCharThenNonWordChar, wordCharThenWordChar);
1128             wordCharThenWordChar.append(jump());
1129         } else {
1130             matchAssertionWordchar(opIndex, wordCharThenWordChar, wordCharThenNonWordChar);
1131             // This can fall-though!
1132         }
1133
1134         op.m_jumps.append(wordCharThenWordChar);
1135
1136         nonWordCharThenWordChar.link(this);
1137         wordCharThenNonWordChar.link(this);
1138     }
1139     void backtrackAssertionWordBoundary(size_t opIndex)
1140     {
1141         backtrackTermDefault(opIndex);
1142     }
1143
1144 #if ENABLE(YARR_JIT_BACKREFERENCES)
1145     void matchBackreference(size_t opIndex, JumpList& characterMatchFails, RegisterID character, RegisterID patternIndex, RegisterID patternCharacter)
1146     {
1147         YarrOp& op = m_ops[opIndex];
1148         PatternTerm* term = op.m_term;
1149         unsigned subpatternId = term->backReferenceSubpatternId;
1150
1151         Label loop(this);
1152
1153         readCharacterDontDecodeSurrogates(0, patternCharacter, patternIndex);
1154         readCharacterDontDecodeSurrogates(m_checkedOffset - term->inputPosition, character);
1155     
1156         if (!m_pattern.ignoreCase())
1157             characterMatchFails.append(branch32(NotEqual, character, patternCharacter));
1158         else {
1159             Jump charactersMatch = branch32(Equal, character, patternCharacter);
1160             ExtendedAddress characterTableEntry(character, reinterpret_cast<intptr_t>(&canonicalTableLChar));
1161             load16(characterTableEntry, character);
1162             ExtendedAddress patternTableEntry(patternCharacter, reinterpret_cast<intptr_t>(&canonicalTableLChar));
1163             load16(patternTableEntry, patternCharacter);
1164             characterMatchFails.append(branch32(NotEqual, character, patternCharacter));
1165             charactersMatch.link(this);
1166         }
1167
1168         
1169         add32(TrustedImm32(1), index);
1170         add32(TrustedImm32(1), patternIndex);
1171         
1172         branch32(NotEqual, patternIndex, Address(output, ((subpatternId << 1) + 1) * sizeof(int))).linkTo(loop, this);
1173     }
1174
1175     void generateBackReference(size_t opIndex)
1176     {
1177         YarrOp& op = m_ops[opIndex];
1178         PatternTerm* term = op.m_term;
1179
1180         if (m_pattern.ignoreCase() && m_charSize != Char8) {
1181             m_failureReason = JITFailureReason::BackReference;
1182             return;
1183         }
1184
1185         unsigned subpatternId = term->backReferenceSubpatternId;
1186         unsigned parenthesesFrameLocation = term->frameLocation;
1187
1188         const RegisterID characterOrTemp = regT0;
1189         const RegisterID patternIndex = regT1;
1190         const RegisterID patternTemp = regT2;
1191
1192         storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
1193         if (term->quantityType != QuantifierFixedCount || term->quantityMaxCount != 1)
1194             storeToFrame(TrustedImm32(0), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1195
1196         JumpList matches;
1197
1198         if (term->quantityType != QuantifierNonGreedy) {
1199             load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
1200             load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
1201
1202             // An empty match is successful without consuming characters
1203             if (term->quantityType != QuantifierFixedCount || term->quantityMaxCount != 1) {
1204                 matches.append(branch32(Equal, TrustedImm32(-1), patternIndex));
1205                 matches.append(branch32(Equal, patternIndex, patternTemp));
1206             } else {
1207                 Jump zeroLengthMatch = branch32(Equal, TrustedImm32(-1), patternIndex);
1208                 Jump tryNonZeroMatch = branch32(NotEqual, patternIndex, patternTemp);
1209                 zeroLengthMatch.link(this);
1210                 storeToFrame(TrustedImm32(1), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1211                 matches.append(jump());
1212                 tryNonZeroMatch.link(this);
1213             }
1214         }
1215
1216         switch (term->quantityType) {
1217         case QuantifierFixedCount: {
1218             Label outerLoop(this);
1219
1220             // PatternTemp should contain pattern end index at this point
1221             sub32(patternIndex, patternTemp);
1222             op.m_jumps.append(checkNotEnoughInput(patternTemp));
1223
1224             matchBackreference(opIndex, op.m_jumps, characterOrTemp, patternIndex, patternTemp);
1225
1226             if (term->quantityMaxCount != 1) {
1227                 loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), characterOrTemp);
1228                 add32(TrustedImm32(1), characterOrTemp);
1229                 storeToFrame(characterOrTemp, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1230                 matches.append(branch32(Equal, Imm32(term->quantityMaxCount.unsafeGet()), characterOrTemp));
1231                 load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
1232                 load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
1233                 jump(outerLoop);
1234             }
1235             matches.link(this);
1236             break;
1237         }
1238
1239         case QuantifierGreedy: {
1240             JumpList incompleteMatches;
1241
1242             Label outerLoop(this);
1243
1244             // PatternTemp should contain pattern end index at this point
1245             sub32(patternIndex, patternTemp);
1246             matches.append(checkNotEnoughInput(patternTemp));
1247
1248             matchBackreference(opIndex, incompleteMatches, characterOrTemp, patternIndex, patternTemp);
1249
1250             loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), characterOrTemp);
1251             add32(TrustedImm32(1), characterOrTemp);
1252             storeToFrame(characterOrTemp, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1253             if (term->quantityMaxCount != quantifyInfinite)
1254                 matches.append(branch32(Equal, Imm32(term->quantityMaxCount.unsafeGet()), characterOrTemp));
1255             load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
1256             load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
1257
1258             // Store current index in frame for restoring after a partial match
1259             storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
1260             jump(outerLoop);
1261
1262             incompleteMatches.link(this);
1263             loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), index);
1264
1265             matches.link(this);
1266             op.m_reentry = label();
1267             break;
1268         }
1269
1270         case QuantifierNonGreedy: {
1271             JumpList incompleteMatches;
1272
1273             matches.append(jump());
1274
1275             op.m_reentry = label();
1276
1277             load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
1278             load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
1279
1280             // An empty match is successful without consuming characters
1281             Jump zeroLengthMatch = branch32(Equal, TrustedImm32(-1), patternIndex);
1282             Jump tryNonZeroMatch = branch32(NotEqual, patternIndex, patternTemp);
1283             zeroLengthMatch.link(this);
1284             storeToFrame(TrustedImm32(1), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1285             matches.append(jump());
1286             tryNonZeroMatch.link(this);
1287
1288             // Check if we have input remaining to match
1289             sub32(patternIndex, patternTemp);
1290             matches.append(checkNotEnoughInput(patternTemp));
1291
1292             storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
1293
1294             matchBackreference(opIndex, incompleteMatches, characterOrTemp, patternIndex, patternTemp);
1295
1296             matches.append(jump());
1297
1298             incompleteMatches.link(this);
1299             loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), index);
1300
1301             matches.link(this);
1302             break;
1303         }
1304         }
1305     }
1306     void backtrackBackReference(size_t opIndex)
1307     {
1308         YarrOp& op = m_ops[opIndex];
1309         PatternTerm* term = op.m_term;
1310
1311         unsigned subpatternId = term->backReferenceSubpatternId;
1312
1313         m_backtrackingState.link(this);
1314         op.m_jumps.link(this);
1315
1316         JumpList failures;
1317
1318         unsigned parenthesesFrameLocation = term->frameLocation;
1319         switch (term->quantityType) {
1320         case QuantifierFixedCount:
1321             loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), index);
1322             break;
1323
1324         case QuantifierGreedy: {
1325             const RegisterID matchAmount = regT0;
1326             const RegisterID patternStartIndex = regT1;
1327             const RegisterID patternEndIndexOrLen = regT2;
1328
1329             loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), matchAmount);
1330             failures.append(branchTest32(Zero, matchAmount));
1331
1332             load32(Address(output, (subpatternId << 1) * sizeof(int)), patternStartIndex);
1333             load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternEndIndexOrLen);
1334             sub32(patternStartIndex, patternEndIndexOrLen);
1335             sub32(patternEndIndexOrLen, index);
1336
1337             sub32(TrustedImm32(1), matchAmount);
1338             storeToFrame(matchAmount, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1339             jump(op.m_reentry);
1340             break;
1341         }
1342
1343         case QuantifierNonGreedy: {
1344             const RegisterID matchAmount = regT0;
1345
1346             failures.append(atEndOfInput());
1347             loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), matchAmount);
1348             if (term->quantityMaxCount != quantifyInfinite)
1349                 failures.append(branch32(AboveOrEqual, Imm32(term->quantityMaxCount.unsafeGet()), matchAmount));
1350             add32(TrustedImm32(1), matchAmount);
1351             storeToFrame(matchAmount, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1352             jump(op.m_reentry);
1353             break;
1354         }
1355         }
1356         failures.link(this);
1357         m_backtrackingState.fallthrough();
1358     }
1359 #endif
1360
1361     void generatePatternCharacterOnce(size_t opIndex)
1362     {
1363         YarrOp& op = m_ops[opIndex];
1364
1365         if (op.m_isDeadCode)
1366             return;
1367         
1368         // m_ops always ends with a OpBodyAlternativeEnd or OpMatchFailed
1369         // node, so there must always be at least one more node.
1370         ASSERT(opIndex + 1 < m_ops.size());
1371         YarrOp* nextOp = &m_ops[opIndex + 1];
1372
1373         PatternTerm* term = op.m_term;
1374         UChar32 ch = term->patternCharacter;
1375
1376         if (!isLatin1(ch) && (m_charSize == Char8)) {
1377             // Have a 16 bit pattern character and an 8 bit string - short circuit
1378             op.m_jumps.append(jump());
1379             return;
1380         }
1381
1382         const RegisterID character = regT0;
1383 #if CPU(X86_64) || CPU(ARM64)
1384         unsigned maxCharactersAtOnce = m_charSize == Char8 ? 8 : 4;
1385 #else
1386         unsigned maxCharactersAtOnce = m_charSize == Char8 ? 4 : 2;
1387 #endif
1388         uint64_t ignoreCaseMask = 0;
1389 #if CPU(BIG_ENDIAN)
1390         uint64_t allCharacters = ch << (m_charSize == Char8 ? 24 : 16);
1391 #else
1392         uint64_t allCharacters = ch;
1393 #endif
1394         unsigned numberCharacters;
1395         unsigned startTermPosition = term->inputPosition;
1396
1397         // For case-insesitive compares, non-ascii characters that have different
1398         // upper & lower case representations are converted to a character class.
1399         ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(ch) || isCanonicallyUnique(ch, m_canonicalMode));
1400
1401         if (m_pattern.ignoreCase() && isASCIIAlpha(ch)) {
1402 #if CPU(BIG_ENDIAN)
1403             ignoreCaseMask |= 32 << (m_charSize == Char8 ? 24 : 16);
1404 #else
1405             ignoreCaseMask |= 32;
1406 #endif
1407         }
1408
1409         for (numberCharacters = 1; numberCharacters < maxCharactersAtOnce && nextOp->m_op == OpTerm; ++numberCharacters, nextOp = &m_ops[opIndex + numberCharacters]) {
1410             PatternTerm* nextTerm = nextOp->m_term;
1411
1412             // YarrJIT handles decoded surrogate pair as one character if unicode flag is enabled.
1413             // Note that the numberCharacters become 1 while the width of the pattern character becomes 32bit in this case.
1414             if (nextTerm->type != PatternTerm::TypePatternCharacter
1415                 || nextTerm->quantityType != QuantifierFixedCount
1416                 || nextTerm->quantityMaxCount != 1
1417                 || nextTerm->inputPosition != (startTermPosition + numberCharacters)
1418                 || (U16_LENGTH(nextTerm->patternCharacter) != 1 && m_decodeSurrogatePairs))
1419                 break;
1420
1421             nextOp->m_isDeadCode = true;
1422
1423 #if CPU(BIG_ENDIAN)
1424             int shiftAmount = (m_charSize == Char8 ? 24 : 16) - ((m_charSize == Char8 ? 8 : 16) * numberCharacters);
1425 #else
1426             int shiftAmount = (m_charSize == Char8 ? 8 : 16) * numberCharacters;
1427 #endif
1428
1429             UChar32 currentCharacter = nextTerm->patternCharacter;
1430
1431             if (!isLatin1(currentCharacter) && (m_charSize == Char8)) {
1432                 // Have a 16 bit pattern character and an 8 bit string - short circuit
1433                 op.m_jumps.append(jump());
1434                 return;
1435             }
1436
1437             // For case-insesitive compares, non-ascii characters that have different
1438             // upper & lower case representations are converted to a character class.
1439             ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(currentCharacter) || isCanonicallyUnique(currentCharacter, m_canonicalMode));
1440
1441             allCharacters |= (static_cast<uint64_t>(currentCharacter) << shiftAmount);
1442
1443             if ((m_pattern.ignoreCase()) && (isASCIIAlpha(currentCharacter)))
1444                 ignoreCaseMask |= 32ULL << shiftAmount;
1445         }
1446
1447         if (m_decodeSurrogatePairs)
1448             op.m_jumps.append(jumpIfNoAvailableInput());
1449
1450         if (m_charSize == Char8) {
1451             auto check1 = [&] (Checked<unsigned> offset, UChar32 characters) {
1452                 op.m_jumps.append(jumpIfCharNotEquals(characters, offset, character));
1453             };
1454
1455             auto check2 = [&] (Checked<unsigned> offset, uint16_t characters, uint16_t mask) {
1456                 load16Unaligned(negativeOffsetIndexedAddress(offset, character), character);
1457                 if (mask)
1458                     or32(Imm32(mask), character);
1459                 op.m_jumps.append(branch32(NotEqual, character, Imm32(characters | mask)));
1460             };
1461
1462             auto check4 = [&] (Checked<unsigned> offset, unsigned characters, unsigned mask) {
1463                 if (mask) {
1464                     load32WithUnalignedHalfWords(negativeOffsetIndexedAddress(offset, character), character);
1465                     if (mask)
1466                         or32(Imm32(mask), character);
1467                     op.m_jumps.append(branch32(NotEqual, character, Imm32(characters | mask)));
1468                     return;
1469                 }
1470                 op.m_jumps.append(branch32WithUnalignedHalfWords(NotEqual, negativeOffsetIndexedAddress(offset, character), TrustedImm32(characters)));
1471             };
1472
1473 #if CPU(X86_64) || CPU(ARM64)
1474             auto check8 = [&] (Checked<unsigned> offset, uint64_t characters, uint64_t mask) {
1475                 load64(negativeOffsetIndexedAddress(offset, character), character);
1476                 if (mask)
1477                     or64(TrustedImm64(mask), character);
1478                 op.m_jumps.append(branch64(NotEqual, character, TrustedImm64(characters | mask)));
1479             };
1480 #endif
1481
1482             switch (numberCharacters) {
1483             case 1:
1484                 // Use 32bit width of allCharacters since Yarr counts surrogate pairs as one character with unicode flag.
1485                 check1(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff);
1486                 return;
1487             case 2: {
1488                 check2(m_checkedOffset - startTermPosition, allCharacters & 0xffff, ignoreCaseMask & 0xffff);
1489                 return;
1490             }
1491             case 3: {
1492                 check2(m_checkedOffset - startTermPosition, allCharacters & 0xffff, ignoreCaseMask & 0xffff);
1493                 check1(m_checkedOffset - startTermPosition - 2, (allCharacters >> 16) & 0xff);
1494                 return;
1495             }
1496             case 4: {
1497                 check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1498                 return;
1499             }
1500 #if CPU(X86_64) || CPU(ARM64)
1501             case 5: {
1502                 check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1503                 check1(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xff);
1504                 return;
1505             }
1506             case 6: {
1507                 check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1508                 check2(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xffff, (ignoreCaseMask >> 32) & 0xffff);
1509                 return;
1510             }
1511             case 7: {
1512                 check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1513                 check2(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xffff, (ignoreCaseMask >> 32) & 0xffff);
1514                 check1(m_checkedOffset - startTermPosition - 6, (allCharacters >> 48) & 0xff);
1515                 return;
1516             }
1517             case 8: {
1518                 check8(m_checkedOffset - startTermPosition, allCharacters, ignoreCaseMask);
1519                 return;
1520             }
1521 #endif
1522             }
1523         } else {
1524             auto check1 = [&] (Checked<unsigned> offset, UChar32 characters) {
1525                 op.m_jumps.append(jumpIfCharNotEquals(characters, offset, character));
1526             };
1527
1528             auto check2 = [&] (Checked<unsigned> offset, unsigned characters, unsigned mask) {
1529                 if (mask) {
1530                     load32WithUnalignedHalfWords(negativeOffsetIndexedAddress(offset, character), character);
1531                     if (mask)
1532                         or32(Imm32(mask), character);
1533                     op.m_jumps.append(branch32(NotEqual, character, Imm32(characters | mask)));
1534                     return;
1535                 }
1536                 op.m_jumps.append(branch32WithUnalignedHalfWords(NotEqual, negativeOffsetIndexedAddress(offset, character), TrustedImm32(characters)));
1537             };
1538
1539 #if CPU(X86_64) || CPU(ARM64)
1540             auto check4 = [&] (Checked<unsigned> offset, uint64_t characters, uint64_t mask) {
1541                 load64(negativeOffsetIndexedAddress(offset, character), character);
1542                 if (mask)
1543                     or64(TrustedImm64(mask), character);
1544                 op.m_jumps.append(branch64(NotEqual, character, TrustedImm64(characters | mask)));
1545             };
1546 #endif
1547
1548             switch (numberCharacters) {
1549             case 1:
1550                 // Use 32bit width of allCharacters since Yarr counts surrogate pairs as one character with unicode flag.
1551                 check1(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff);
1552                 return;
1553             case 2:
1554                 check2(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1555                 return;
1556 #if CPU(X86_64) || CPU(ARM64)
1557             case 3:
1558                 check2(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1559                 check1(m_checkedOffset - startTermPosition - 2, (allCharacters >> 32) & 0xffff);
1560                 return;
1561             case 4:
1562                 check4(m_checkedOffset - startTermPosition, allCharacters, ignoreCaseMask);
1563                 return;
1564 #endif
1565             }
1566         }
1567     }
1568     void backtrackPatternCharacterOnce(size_t opIndex)
1569     {
1570         backtrackTermDefault(opIndex);
1571     }
1572
1573     void generatePatternCharacterFixed(size_t opIndex)
1574     {
1575         YarrOp& op = m_ops[opIndex];
1576         PatternTerm* term = op.m_term;
1577         UChar32 ch = term->patternCharacter;
1578
1579         const RegisterID character = regT0;
1580         const RegisterID countRegister = regT1;
1581
1582         if (m_decodeSurrogatePairs)
1583             op.m_jumps.append(jumpIfNoAvailableInput());
1584
1585         move(index, countRegister);
1586         Checked<unsigned> scaledMaxCount = term->quantityMaxCount;
1587         scaledMaxCount *= U_IS_BMP(ch) ? 1 : 2;
1588         sub32(Imm32(scaledMaxCount.unsafeGet()), countRegister);
1589
1590         Label loop(this);
1591         readCharacter(m_checkedOffset - term->inputPosition - scaledMaxCount, character, countRegister);
1592         // For case-insesitive compares, non-ascii characters that have different
1593         // upper & lower case representations are converted to a character class.
1594         ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(ch) || isCanonicallyUnique(ch, m_canonicalMode));
1595         if (m_pattern.ignoreCase() && isASCIIAlpha(ch)) {
1596             or32(TrustedImm32(0x20), character);
1597             ch |= 0x20;
1598         }
1599
1600         op.m_jumps.append(branch32(NotEqual, character, Imm32(ch)));
1601 #ifdef JIT_UNICODE_EXPRESSIONS
1602         if (m_decodeSurrogatePairs && !U_IS_BMP(ch))
1603             add32(TrustedImm32(2), countRegister);
1604         else
1605 #endif
1606             add32(TrustedImm32(1), countRegister);
1607         branch32(NotEqual, countRegister, index).linkTo(loop, this);
1608     }
1609     void backtrackPatternCharacterFixed(size_t opIndex)
1610     {
1611         backtrackTermDefault(opIndex);
1612     }
1613
1614     void generatePatternCharacterGreedy(size_t opIndex)
1615     {
1616         YarrOp& op = m_ops[opIndex];
1617         PatternTerm* term = op.m_term;
1618         UChar32 ch = term->patternCharacter;
1619
1620         const RegisterID character = regT0;
1621         const RegisterID countRegister = regT1;
1622
1623         move(TrustedImm32(0), countRegister);
1624
1625         // Unless have a 16 bit pattern character and an 8 bit string - short circuit
1626         if (!(!isLatin1(ch) && (m_charSize == Char8))) {
1627             JumpList failures;
1628             Label loop(this);
1629             failures.append(atEndOfInput());
1630             failures.append(jumpIfCharNotEquals(ch, m_checkedOffset - term->inputPosition, character));
1631
1632             add32(TrustedImm32(1), index);
1633 #ifdef JIT_UNICODE_EXPRESSIONS
1634             if (m_decodeSurrogatePairs && !U_IS_BMP(ch)) {
1635                 Jump surrogatePairOk = notAtEndOfInput();
1636                 sub32(TrustedImm32(1), index);
1637                 failures.append(jump());
1638                 surrogatePairOk.link(this);
1639                 add32(TrustedImm32(1), index);
1640             }
1641 #endif
1642             add32(TrustedImm32(1), countRegister);
1643
1644             if (term->quantityMaxCount == quantifyInfinite)
1645                 jump(loop);
1646             else
1647                 branch32(NotEqual, countRegister, Imm32(term->quantityMaxCount.unsafeGet())).linkTo(loop, this);
1648
1649             failures.link(this);
1650         }
1651         op.m_reentry = label();
1652
1653         storeToFrame(countRegister, term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex());
1654     }
1655     void backtrackPatternCharacterGreedy(size_t opIndex)
1656     {
1657         YarrOp& op = m_ops[opIndex];
1658         PatternTerm* term = op.m_term;
1659
1660         const RegisterID countRegister = regT1;
1661
1662         m_backtrackingState.link(this);
1663
1664         loadFromFrame(term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex(), countRegister);
1665         m_backtrackingState.append(branchTest32(Zero, countRegister));
1666         sub32(TrustedImm32(1), countRegister);
1667         if (!m_decodeSurrogatePairs || U_IS_BMP(term->patternCharacter))
1668             sub32(TrustedImm32(1), index);
1669         else
1670             sub32(TrustedImm32(2), index);
1671         jump(op.m_reentry);
1672     }
1673
1674     void generatePatternCharacterNonGreedy(size_t opIndex)
1675     {
1676         YarrOp& op = m_ops[opIndex];
1677         PatternTerm* term = op.m_term;
1678
1679         const RegisterID countRegister = regT1;
1680
1681         move(TrustedImm32(0), countRegister);
1682         op.m_reentry = label();
1683         storeToFrame(countRegister, term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex());
1684     }
1685     void backtrackPatternCharacterNonGreedy(size_t opIndex)
1686     {
1687         YarrOp& op = m_ops[opIndex];
1688         PatternTerm* term = op.m_term;
1689         UChar32 ch = term->patternCharacter;
1690
1691         const RegisterID character = regT0;
1692         const RegisterID countRegister = regT1;
1693
1694         m_backtrackingState.link(this);
1695
1696         loadFromFrame(term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex(), countRegister);
1697
1698         // Unless have a 16 bit pattern character and an 8 bit string - short circuit
1699         if (!(!isLatin1(ch) && (m_charSize == Char8))) {
1700             JumpList nonGreedyFailures;
1701             nonGreedyFailures.append(atEndOfInput());
1702             if (term->quantityMaxCount != quantifyInfinite)
1703                 nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityMaxCount.unsafeGet())));
1704             nonGreedyFailures.append(jumpIfCharNotEquals(ch, m_checkedOffset - term->inputPosition, character));
1705
1706             add32(TrustedImm32(1), index);
1707 #ifdef JIT_UNICODE_EXPRESSIONS
1708             if (m_decodeSurrogatePairs && !U_IS_BMP(ch)) {
1709                 Jump surrogatePairOk = notAtEndOfInput();
1710                 sub32(TrustedImm32(1), index);
1711                 nonGreedyFailures.append(jump());
1712                 surrogatePairOk.link(this);
1713                 add32(TrustedImm32(1), index);
1714             }
1715 #endif
1716             add32(TrustedImm32(1), countRegister);
1717
1718             jump(op.m_reentry);
1719             nonGreedyFailures.link(this);
1720         }
1721
1722         if (m_decodeSurrogatePairs && !U_IS_BMP(ch)) {
1723             // subtract countRegister*2 for non-BMP characters
1724             lshift32(TrustedImm32(1), countRegister);
1725         }
1726
1727         sub32(countRegister, index);
1728         m_backtrackingState.fallthrough();
1729     }
1730
1731     void generateCharacterClassOnce(size_t opIndex)
1732     {
1733         YarrOp& op = m_ops[opIndex];
1734         PatternTerm* term = op.m_term;
1735
1736         const RegisterID character = regT0;
1737
1738         if (m_decodeSurrogatePairs) {
1739             op.m_jumps.append(jumpIfNoAvailableInput());
1740             storeToFrame(index, term->frameLocation + BackTrackInfoCharacterClass::beginIndex());
1741         }
1742
1743         JumpList matchDest;
1744         readCharacter(m_checkedOffset - term->inputPosition, character);
1745         // If we are matching the "any character" builtin class we only need to read the
1746         // character and don't need to match as it will always succeed.
1747         if (term->invert() || !term->characterClass->m_anyCharacter) {
1748             matchCharacterClass(character, matchDest, term->characterClass);
1749
1750             if (term->invert())
1751                 op.m_jumps.append(matchDest);
1752             else {
1753                 op.m_jumps.append(jump());
1754                 matchDest.link(this);
1755             }
1756         }
1757 #ifdef JIT_UNICODE_EXPRESSIONS
1758         if (m_decodeSurrogatePairs && (!term->characterClass->hasOneCharacterSize() || term->invert())) {
1759             Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
1760             add32(TrustedImm32(1), index);
1761             isBMPChar.link(this);
1762         }
1763 #endif
1764     }
1765     void backtrackCharacterClassOnce(size_t opIndex)
1766     {
1767 #ifdef JIT_UNICODE_EXPRESSIONS
1768         if (m_decodeSurrogatePairs) {
1769             YarrOp& op = m_ops[opIndex];
1770             PatternTerm* term = op.m_term;
1771
1772             m_backtrackingState.link(this);
1773             loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::beginIndex(), index);
1774             m_backtrackingState.fallthrough();
1775         }
1776 #endif
1777         backtrackTermDefault(opIndex);
1778     }
1779
1780     void generateCharacterClassFixed(size_t opIndex)
1781     {
1782         YarrOp& op = m_ops[opIndex];
1783         PatternTerm* term = op.m_term;
1784
1785         const RegisterID character = regT0;
1786         const RegisterID countRegister = regT1;
1787
1788         if (m_decodeSurrogatePairs)
1789             op.m_jumps.append(jumpIfNoAvailableInput());
1790
1791         move(index, countRegister);
1792
1793         Checked<unsigned> scaledMaxCount = term->quantityMaxCount;
1794
1795 #ifdef JIT_UNICODE_EXPRESSIONS
1796         if (m_decodeSurrogatePairs && term->characterClass->hasOnlyNonBMPCharacters() && !term->invert())
1797             scaledMaxCount *= 2;
1798 #endif
1799         sub32(Imm32(scaledMaxCount.unsafeGet()), countRegister);
1800
1801         Label loop(this);
1802         JumpList matchDest;
1803         readCharacter(m_checkedOffset - term->inputPosition - scaledMaxCount, character, countRegister);
1804         // If we are matching the "any character" builtin class we only need to read the
1805         // character and don't need to match as it will always succeed.
1806         if (term->invert() || !term->characterClass->m_anyCharacter) {
1807             matchCharacterClass(character, matchDest, term->characterClass);
1808
1809             if (term->invert())
1810                 op.m_jumps.append(matchDest);
1811             else {
1812                 op.m_jumps.append(jump());
1813                 matchDest.link(this);
1814             }
1815         }
1816
1817 #ifdef JIT_UNICODE_EXPRESSIONS
1818         if (m_decodeSurrogatePairs) {
1819             if (term->characterClass->hasOneCharacterSize() && !term->invert())
1820                 add32(TrustedImm32(term->characterClass->hasNonBMPCharacters() ? 2 : 1), countRegister);
1821             else {
1822                 add32(TrustedImm32(1), countRegister);
1823                 Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
1824                 op.m_jumps.append(atEndOfInput());
1825                 add32(TrustedImm32(1), countRegister);
1826                 add32(TrustedImm32(1), index);
1827                 isBMPChar.link(this);
1828             }
1829         } else
1830 #endif
1831             add32(TrustedImm32(1), countRegister);
1832         branch32(NotEqual, countRegister, index).linkTo(loop, this);
1833     }
1834     void backtrackCharacterClassFixed(size_t opIndex)
1835     {
1836         backtrackTermDefault(opIndex);
1837     }
1838
1839     void generateCharacterClassGreedy(size_t opIndex)
1840     {
1841         YarrOp& op = m_ops[opIndex];
1842         PatternTerm* term = op.m_term;
1843
1844         const RegisterID character = regT0;
1845         const RegisterID countRegister = regT1;
1846
1847         if (m_decodeSurrogatePairs && (!term->characterClass->hasOneCharacterSize() || term->invert()))
1848             storeToFrame(index, term->frameLocation + BackTrackInfoCharacterClass::beginIndex());
1849         move(TrustedImm32(0), countRegister);
1850
1851         JumpList failures;
1852         Label loop(this);
1853 #ifdef JIT_UNICODE_EXPRESSIONS
1854         if (term->characterClass->hasOneCharacterSize() && !term->invert() && term->characterClass->hasNonBMPCharacters()) {
1855             move(TrustedImm32(1), character);
1856             failures.append(checkNotEnoughInput(character));
1857         } else
1858 #endif
1859             failures.append(atEndOfInput());
1860
1861         if (term->invert()) {
1862             readCharacter(m_checkedOffset - term->inputPosition, character);
1863             matchCharacterClass(character, failures, term->characterClass);
1864         } else {
1865             JumpList matchDest;
1866             readCharacter(m_checkedOffset - term->inputPosition, character);
1867             // If we are matching the "any character" builtin class for non-unicode patterns,
1868             // we only need to read the character and don't need to match as it will always succeed.
1869             if (!term->characterClass->m_anyCharacter) {
1870                 matchCharacterClass(character, matchDest, term->characterClass);
1871                 failures.append(jump());
1872             }
1873             matchDest.link(this);
1874         }
1875
1876 #ifdef JIT_UNICODE_EXPRESSIONS
1877         if (m_decodeSurrogatePairs)
1878             advanceIndexAfterCharacterClassTermMatch(term, failures, character);
1879         else
1880 #endif
1881             add32(TrustedImm32(1), index);
1882         add32(TrustedImm32(1), countRegister);
1883
1884         if (term->quantityMaxCount != quantifyInfinite) {
1885             branch32(NotEqual, countRegister, Imm32(term->quantityMaxCount.unsafeGet())).linkTo(loop, this);
1886             failures.append(jump());
1887         } else
1888             jump(loop);
1889
1890         failures.link(this);
1891         op.m_reentry = label();
1892
1893         storeToFrame(countRegister, term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex());
1894     }
1895     void backtrackCharacterClassGreedy(size_t opIndex)
1896     {
1897         YarrOp& op = m_ops[opIndex];
1898         PatternTerm* term = op.m_term;
1899
1900         const RegisterID countRegister = regT1;
1901
1902         m_backtrackingState.link(this);
1903
1904         loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex(), countRegister);
1905         m_backtrackingState.append(branchTest32(Zero, countRegister));
1906         sub32(TrustedImm32(1), countRegister);
1907         storeToFrame(countRegister, term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex());
1908
1909         if (!m_decodeSurrogatePairs)
1910             sub32(TrustedImm32(1), index);
1911         else if (term->characterClass->hasOneCharacterSize() && !term->invert())
1912             sub32(TrustedImm32(term->characterClass->hasNonBMPCharacters() ? 2 : 1), index);
1913         else {
1914             // Rematch one less
1915             const RegisterID character = regT0;
1916
1917             loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::beginIndex(), index);
1918
1919             Label rematchLoop(this);
1920             readCharacter(m_checkedOffset - term->inputPosition, character);
1921
1922             sub32(TrustedImm32(1), countRegister);
1923             add32(TrustedImm32(1), index);
1924
1925 #ifdef JIT_UNICODE_EXPRESSIONS
1926             Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
1927             add32(TrustedImm32(1), index);
1928             isBMPChar.link(this);
1929 #endif
1930
1931             branchTest32(Zero, countRegister).linkTo(rematchLoop, this);
1932
1933             loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex(), countRegister);
1934         }
1935         jump(op.m_reentry);
1936     }
1937
1938     void generateCharacterClassNonGreedy(size_t opIndex)
1939     {
1940         YarrOp& op = m_ops[opIndex];
1941         PatternTerm* term = op.m_term;
1942
1943         const RegisterID countRegister = regT1;
1944
1945         move(TrustedImm32(0), countRegister);
1946         op.m_reentry = label();
1947
1948 #ifdef JIT_UNICODE_EXPRESSIONS
1949         if (m_decodeSurrogatePairs) {
1950             if (!term->characterClass->hasOneCharacterSize() || term->invert())
1951                 storeToFrame(index, term->frameLocation + BackTrackInfoCharacterClass::beginIndex());
1952         }
1953 #endif
1954
1955         storeToFrame(countRegister, term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex());
1956     }
1957
1958     void backtrackCharacterClassNonGreedy(size_t opIndex)
1959     {
1960         YarrOp& op = m_ops[opIndex];
1961         PatternTerm* term = op.m_term;
1962
1963         const RegisterID character = regT0;
1964         const RegisterID countRegister = regT1;
1965
1966         JumpList nonGreedyFailures;
1967
1968         m_backtrackingState.link(this);
1969
1970 #ifdef JIT_UNICODE_EXPRESSIONS
1971         if (m_decodeSurrogatePairs) {
1972             if (!term->characterClass->hasOneCharacterSize() || term->invert())
1973                 loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::beginIndex(), index);
1974         }
1975 #endif
1976
1977         loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex(), countRegister);
1978
1979         nonGreedyFailures.append(atEndOfInput());
1980         nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityMaxCount.unsafeGet())));
1981
1982         JumpList matchDest;
1983         readCharacter(m_checkedOffset - term->inputPosition, character);
1984         // If we are matching the "any character" builtin class for non-unicode patterns,
1985         // we only need to read the character and don't need to match as it will always succeed.
1986         if (term->invert() || !term->characterClass->m_anyCharacter) {
1987             matchCharacterClass(character, matchDest, term->characterClass);
1988
1989             if (term->invert())
1990                 nonGreedyFailures.append(matchDest);
1991             else {
1992                 nonGreedyFailures.append(jump());
1993                 matchDest.link(this);
1994             }
1995         }
1996
1997 #ifdef JIT_UNICODE_EXPRESSIONS
1998         if (m_decodeSurrogatePairs)
1999             advanceIndexAfterCharacterClassTermMatch(term, nonGreedyFailures, character);
2000         else
2001 #endif
2002             add32(TrustedImm32(1), index);
2003         add32(TrustedImm32(1), countRegister);
2004
2005         jump(op.m_reentry);
2006
2007         nonGreedyFailures.link(this);
2008         sub32(countRegister, index);
2009         m_backtrackingState.fallthrough();
2010     }
2011
2012     void generateDotStarEnclosure(size_t opIndex)
2013     {
2014         YarrOp& op = m_ops[opIndex];
2015         PatternTerm* term = op.m_term;
2016
2017         const RegisterID character = regT0;
2018         const RegisterID matchPos = regT1;
2019 #ifndef HAVE_INITIAL_START_REG
2020         const RegisterID initialStart = character;
2021 #endif
2022
2023         JumpList foundBeginningNewLine;
2024         JumpList saveStartIndex;
2025         JumpList foundEndingNewLine;
2026
2027         if (m_pattern.dotAll()) {
2028             move(TrustedImm32(0), matchPos);
2029             setMatchStart(matchPos);
2030             move(length, index);
2031             return;
2032         }
2033
2034         ASSERT(!m_pattern.m_body->m_hasFixedSize);
2035         getMatchStart(matchPos);
2036
2037 #ifndef HAVE_INITIAL_START_REG
2038         loadFromFrame(m_pattern.m_initialStartValueFrameLocation, initialStart);
2039 #endif
2040         saveStartIndex.append(branch32(BelowOrEqual, matchPos, initialStart));
2041         Label findBOLLoop(this);
2042         sub32(TrustedImm32(1), matchPos);
2043         if (m_charSize == Char8)
2044             load8(BaseIndex(input, matchPos, TimesOne, 0), character);
2045         else
2046             load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
2047         matchCharacterClass(character, foundBeginningNewLine, m_pattern.newlineCharacterClass());
2048
2049 #ifndef HAVE_INITIAL_START_REG
2050         loadFromFrame(m_pattern.m_initialStartValueFrameLocation, initialStart);
2051 #endif
2052         branch32(Above, matchPos, initialStart).linkTo(findBOLLoop, this);
2053         saveStartIndex.append(jump());
2054
2055         foundBeginningNewLine.link(this);
2056         add32(TrustedImm32(1), matchPos); // Advance past newline
2057         saveStartIndex.link(this);
2058
2059         if (!m_pattern.multiline() && term->anchors.bolAnchor)
2060             op.m_jumps.append(branchTest32(NonZero, matchPos));
2061
2062         ASSERT(!m_pattern.m_body->m_hasFixedSize);
2063         setMatchStart(matchPos);
2064
2065         move(index, matchPos);
2066
2067         Label findEOLLoop(this);        
2068         foundEndingNewLine.append(branch32(Equal, matchPos, length));
2069         if (m_charSize == Char8)
2070             load8(BaseIndex(input, matchPos, TimesOne, 0), character);
2071         else
2072             load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
2073         matchCharacterClass(character, foundEndingNewLine, m_pattern.newlineCharacterClass());
2074         add32(TrustedImm32(1), matchPos);
2075         jump(findEOLLoop);
2076
2077         foundEndingNewLine.link(this);
2078
2079         if (!m_pattern.multiline() && term->anchors.eolAnchor)
2080             op.m_jumps.append(branch32(NotEqual, matchPos, length));
2081
2082         move(matchPos, index);
2083     }
2084
2085     void backtrackDotStarEnclosure(size_t opIndex)
2086     {
2087         backtrackTermDefault(opIndex);
2088     }
2089     
2090     // Code generation/backtracking for simple terms
2091     // (pattern characters, character classes, and assertions).
2092     // These methods farm out work to the set of functions above.
2093     void generateTerm(size_t opIndex)
2094     {
2095         YarrOp& op = m_ops[opIndex];
2096         PatternTerm* term = op.m_term;
2097
2098         switch (term->type) {
2099         case PatternTerm::TypePatternCharacter:
2100             switch (term->quantityType) {
2101             case QuantifierFixedCount:
2102                 if (term->quantityMaxCount == 1)
2103                     generatePatternCharacterOnce(opIndex);
2104                 else
2105                     generatePatternCharacterFixed(opIndex);
2106                 break;
2107             case QuantifierGreedy:
2108                 generatePatternCharacterGreedy(opIndex);
2109                 break;
2110             case QuantifierNonGreedy:
2111                 generatePatternCharacterNonGreedy(opIndex);
2112                 break;
2113             }
2114             break;
2115
2116         case PatternTerm::TypeCharacterClass:
2117             switch (term->quantityType) {
2118             case QuantifierFixedCount:
2119                 if (term->quantityMaxCount == 1)
2120                     generateCharacterClassOnce(opIndex);
2121                 else
2122                     generateCharacterClassFixed(opIndex);
2123                 break;
2124             case QuantifierGreedy:
2125                 generateCharacterClassGreedy(opIndex);
2126                 break;
2127             case QuantifierNonGreedy:
2128                 generateCharacterClassNonGreedy(opIndex);
2129                 break;
2130             }
2131             break;
2132
2133         case PatternTerm::TypeAssertionBOL:
2134             generateAssertionBOL(opIndex);
2135             break;
2136
2137         case PatternTerm::TypeAssertionEOL:
2138             generateAssertionEOL(opIndex);
2139             break;
2140
2141         case PatternTerm::TypeAssertionWordBoundary:
2142             generateAssertionWordBoundary(opIndex);
2143             break;
2144
2145         case PatternTerm::TypeForwardReference:
2146             m_failureReason = JITFailureReason::ForwardReference;
2147             break;
2148
2149         case PatternTerm::TypeParenthesesSubpattern:
2150         case PatternTerm::TypeParentheticalAssertion:
2151             RELEASE_ASSERT_NOT_REACHED();
2152
2153         case PatternTerm::TypeBackReference:
2154 #if ENABLE(YARR_JIT_BACKREFERENCES)
2155             generateBackReference(opIndex);
2156 #else
2157             m_failureReason = JITFailureReason::BackReference;
2158 #endif
2159             break;
2160         case PatternTerm::TypeDotStarEnclosure:
2161             generateDotStarEnclosure(opIndex);
2162             break;
2163         }
2164     }
2165     void backtrackTerm(size_t opIndex)
2166     {
2167         YarrOp& op = m_ops[opIndex];
2168         PatternTerm* term = op.m_term;
2169
2170         switch (term->type) {
2171         case PatternTerm::TypePatternCharacter:
2172             switch (term->quantityType) {
2173             case QuantifierFixedCount:
2174                 if (term->quantityMaxCount == 1)
2175                     backtrackPatternCharacterOnce(opIndex);
2176                 else
2177                     backtrackPatternCharacterFixed(opIndex);
2178                 break;
2179             case QuantifierGreedy:
2180                 backtrackPatternCharacterGreedy(opIndex);
2181                 break;
2182             case QuantifierNonGreedy:
2183                 backtrackPatternCharacterNonGreedy(opIndex);
2184                 break;
2185             }
2186             break;
2187
2188         case PatternTerm::TypeCharacterClass:
2189             switch (term->quantityType) {
2190             case QuantifierFixedCount:
2191                 if (term->quantityMaxCount == 1)
2192                     backtrackCharacterClassOnce(opIndex);
2193                 else
2194                     backtrackCharacterClassFixed(opIndex);
2195                 break;
2196             case QuantifierGreedy:
2197                 backtrackCharacterClassGreedy(opIndex);
2198                 break;
2199             case QuantifierNonGreedy:
2200                 backtrackCharacterClassNonGreedy(opIndex);
2201                 break;
2202             }
2203             break;
2204
2205         case PatternTerm::TypeAssertionBOL:
2206             backtrackAssertionBOL(opIndex);
2207             break;
2208
2209         case PatternTerm::TypeAssertionEOL:
2210             backtrackAssertionEOL(opIndex);
2211             break;
2212
2213         case PatternTerm::TypeAssertionWordBoundary:
2214             backtrackAssertionWordBoundary(opIndex);
2215             break;
2216
2217         case PatternTerm::TypeForwardReference:
2218             m_failureReason = JITFailureReason::ForwardReference;
2219             break;
2220
2221         case PatternTerm::TypeParenthesesSubpattern:
2222         case PatternTerm::TypeParentheticalAssertion:
2223             RELEASE_ASSERT_NOT_REACHED();
2224
2225         case PatternTerm::TypeBackReference:
2226 #if ENABLE(YARR_JIT_BACKREFERENCES)
2227             backtrackBackReference(opIndex);
2228 #else
2229             m_failureReason = JITFailureReason::BackReference;
2230 #endif
2231             break;
2232
2233         case PatternTerm::TypeDotStarEnclosure:
2234             backtrackDotStarEnclosure(opIndex);
2235             break;
2236         }
2237     }
2238
2239     void generate()
2240     {
2241         // Forwards generate the matching code.
2242         ASSERT(m_ops.size());
2243         size_t opIndex = 0;
2244
2245         do {
2246             if (m_disassembler)
2247                 m_disassembler->setForGenerate(opIndex, label());
2248
2249             YarrOp& op = m_ops[opIndex];
2250             switch (op.m_op) {
2251
2252             case OpTerm:
2253                 generateTerm(opIndex);
2254                 break;
2255
2256             // OpBodyAlternativeBegin/Next/End
2257             //
2258             // These nodes wrap the set of alternatives in the body of the regular expression.
2259             // There may be either one or two chains of OpBodyAlternative nodes, one representing
2260             // the 'once through' sequence of alternatives (if any exist), and one representing
2261             // the repeating alternatives (again, if any exist).
2262             //
2263             // Upon normal entry to the Begin alternative, we will check that input is available.
2264             // Reentry to the Begin alternative will take place after the check has taken place,
2265             // and will assume that the input position has already been progressed as appropriate.
2266             //
2267             // Entry to subsequent Next/End alternatives occurs when the prior alternative has
2268             // successfully completed a match - return a success state from JIT code.
2269             //
2270             // Next alternatives allow for reentry optimized to suit backtracking from its
2271             // preceding alternative. It expects the input position to still be set to a position
2272             // appropriate to its predecessor, and it will only perform an input check if the
2273             // predecessor had a minimum size less than its own.
2274             //
2275             // In the case 'once through' expressions, the End node will also have a reentry
2276             // point to jump to when the last alternative fails. Again, this expects the input
2277             // position to still reflect that expected by the prior alternative.
2278             case OpBodyAlternativeBegin: {
2279                 PatternAlternative* alternative = op.m_alternative;
2280
2281                 // Upon entry at the head of the set of alternatives, check if input is available
2282                 // to run the first alternative. (This progresses the input position).
2283                 op.m_jumps.append(jumpIfNoAvailableInput(alternative->m_minimumSize));
2284                 // We will reenter after the check, and assume the input position to have been
2285                 // set as appropriate to this alternative.
2286                 op.m_reentry = label();
2287
2288                 m_checkedOffset += alternative->m_minimumSize;
2289                 break;
2290             }
2291             case OpBodyAlternativeNext:
2292             case OpBodyAlternativeEnd: {
2293                 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
2294                 PatternAlternative* alternative = op.m_alternative;
2295
2296                 // If we get here, the prior alternative matched - return success.
2297                 
2298                 // Adjust the stack pointer to remove the pattern's frame.
2299                 removeCallFrame();
2300
2301                 // Load appropriate values into the return register and the first output
2302                 // slot, and return. In the case of pattern with a fixed size, we will
2303                 // not have yet set the value in the first 
2304                 ASSERT(index != returnRegister);
2305                 if (m_pattern.m_body->m_hasFixedSize) {
2306                     move(index, returnRegister);
2307                     if (priorAlternative->m_minimumSize)
2308                         sub32(Imm32(priorAlternative->m_minimumSize), returnRegister);
2309                     if (compileMode == IncludeSubpatterns)
2310                         store32(returnRegister, output);
2311                 } else
2312                     getMatchStart(returnRegister);
2313                 if (compileMode == IncludeSubpatterns)
2314                     store32(index, Address(output, 4));
2315                 move(index, returnRegister2);
2316
2317                 generateReturn();
2318
2319                 // This is the divide between the tail of the prior alternative, above, and
2320                 // the head of the subsequent alternative, below.
2321
2322                 if (op.m_op == OpBodyAlternativeNext) {
2323                     // This is the reentry point for the Next alternative. We expect any code
2324                     // that jumps here to do so with the input position matching that of the
2325                     // PRIOR alteranative, and we will only check input availability if we
2326                     // need to progress it forwards.
2327                     op.m_reentry = label();
2328                     if (alternative->m_minimumSize > priorAlternative->m_minimumSize) {
2329                         add32(Imm32(alternative->m_minimumSize - priorAlternative->m_minimumSize), index);
2330                         op.m_jumps.append(jumpIfNoAvailableInput());
2331                     } else if (priorAlternative->m_minimumSize > alternative->m_minimumSize)
2332                         sub32(Imm32(priorAlternative->m_minimumSize - alternative->m_minimumSize), index);
2333                 } else if (op.m_nextOp == notFound) {
2334                     // This is the reentry point for the End of 'once through' alternatives,
2335                     // jumped to when the last alternative fails to match.
2336                     op.m_reentry = label();
2337                     sub32(Imm32(priorAlternative->m_minimumSize), index);
2338                 }
2339
2340                 if (op.m_op == OpBodyAlternativeNext)
2341                     m_checkedOffset += alternative->m_minimumSize;
2342                 m_checkedOffset -= priorAlternative->m_minimumSize;
2343                 break;
2344             }
2345
2346             // OpSimpleNestedAlternativeBegin/Next/End
2347             // OpNestedAlternativeBegin/Next/End
2348             //
2349             // These nodes are used to handle sets of alternatives that are nested within
2350             // subpatterns and parenthetical assertions. The 'simple' forms are used where
2351             // we do not need to be able to backtrack back into any alternative other than
2352             // the last, the normal forms allow backtracking into any alternative.
2353             //
2354             // Each Begin/Next node is responsible for planting an input check to ensure
2355             // sufficient input is available on entry. Next nodes additionally need to
2356             // jump to the end - Next nodes use the End node's m_jumps list to hold this
2357             // set of jumps.
2358             //
2359             // In the non-simple forms, successful alternative matches must store a
2360             // 'return address' using a DataLabelPtr, used to store the address to jump
2361             // to when backtracking, to get to the code for the appropriate alternative.
2362             case OpSimpleNestedAlternativeBegin:
2363             case OpNestedAlternativeBegin: {
2364                 PatternTerm* term = op.m_term;
2365                 PatternAlternative* alternative = op.m_alternative;
2366                 PatternDisjunction* disjunction = term->parentheses.disjunction;
2367
2368                 // Calculate how much input we need to check for, and if non-zero check.
2369                 op.m_checkAdjust = Checked<unsigned>(alternative->m_minimumSize);
2370                 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
2371                     op.m_checkAdjust -= disjunction->m_minimumSize;
2372                 if (op.m_checkAdjust)
2373                     op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust.unsafeGet()));
2374
2375                 m_checkedOffset += op.m_checkAdjust;
2376                 break;
2377             }
2378             case OpSimpleNestedAlternativeNext:
2379             case OpNestedAlternativeNext: {
2380                 PatternTerm* term = op.m_term;
2381                 PatternAlternative* alternative = op.m_alternative;
2382                 PatternDisjunction* disjunction = term->parentheses.disjunction;
2383
2384                 // In the non-simple case, store a 'return address' so we can backtrack correctly.
2385                 if (op.m_op == OpNestedAlternativeNext) {
2386                     unsigned parenthesesFrameLocation = term->frameLocation;
2387                     op.m_returnAddress = storeToFrameWithPatch(parenthesesFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
2388                 }
2389
2390                 if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
2391                     // If the previous alternative matched without consuming characters then
2392                     // backtrack to try to match while consumming some input.
2393                     op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
2394                 }
2395
2396                 // If we reach here then the last alternative has matched - jump to the
2397                 // End node, to skip over any further alternatives.
2398                 //
2399                 // FIXME: this is logically O(N^2) (though N can be expected to be very
2400                 // small). We could avoid this either by adding an extra jump to the JIT
2401                 // data structures, or by making backtracking code that jumps to Next
2402                 // alternatives are responsible for checking that input is available (if
2403                 // we didn't need to plant the input checks, then m_jumps would be free).
2404                 YarrOp* endOp = &m_ops[op.m_nextOp];
2405                 while (endOp->m_nextOp != notFound) {
2406                     ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
2407                     endOp = &m_ops[endOp->m_nextOp];
2408                 }
2409                 ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
2410                 endOp->m_jumps.append(jump());
2411
2412                 // This is the entry point for the next alternative.
2413                 op.m_reentry = label();
2414
2415                 // Calculate how much input we need to check for, and if non-zero check.
2416                 op.m_checkAdjust = alternative->m_minimumSize;
2417                 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
2418                     op.m_checkAdjust -= disjunction->m_minimumSize;
2419                 if (op.m_checkAdjust)
2420                     op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust.unsafeGet()));
2421
2422                 YarrOp& lastOp = m_ops[op.m_previousOp];
2423                 m_checkedOffset -= lastOp.m_checkAdjust;
2424                 m_checkedOffset += op.m_checkAdjust;
2425                 break;
2426             }
2427             case OpSimpleNestedAlternativeEnd:
2428             case OpNestedAlternativeEnd: {
2429                 PatternTerm* term = op.m_term;
2430
2431                 // In the non-simple case, store a 'return address' so we can backtrack correctly.
2432                 if (op.m_op == OpNestedAlternativeEnd) {
2433                     unsigned parenthesesFrameLocation = term->frameLocation;
2434                     op.m_returnAddress = storeToFrameWithPatch(parenthesesFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
2435                 }
2436
2437                 if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
2438                     // If the previous alternative matched without consuming characters then
2439                     // backtrack to try to match while consumming some input.
2440                     op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
2441                 }
2442
2443                 // If this set of alternatives contains more than one alternative,
2444                 // then the Next nodes will have planted jumps to the End, and added
2445                 // them to this node's m_jumps list.
2446                 op.m_jumps.link(this);
2447                 op.m_jumps.clear();
2448
2449                 YarrOp& lastOp = m_ops[op.m_previousOp];
2450                 m_checkedOffset -= lastOp.m_checkAdjust;
2451                 break;
2452             }
2453
2454             // OpParenthesesSubpatternOnceBegin/End
2455             //
2456             // These nodes support (optionally) capturing subpatterns, that have a
2457             // quantity count of 1 (this covers fixed once, and ?/?? quantifiers). 
2458             case OpParenthesesSubpatternOnceBegin: {
2459                 PatternTerm* term = op.m_term;
2460                 unsigned parenthesesFrameLocation = term->frameLocation;
2461                 const RegisterID indexTemporary = regT0;
2462                 ASSERT(term->quantityMaxCount == 1);
2463
2464                 // Upon entry to a Greedy quantified set of parenthese store the index.
2465                 // We'll use this for two purposes:
2466                 //  - To indicate which iteration we are on of mathing the remainder of
2467                 //    the expression after the parentheses - the first, including the
2468                 //    match within the parentheses, or the second having skipped over them.
2469                 //  - To check for empty matches, which must be rejected.
2470                 //
2471                 // At the head of a NonGreedy set of parentheses we'll immediately set the
2472                 // value on the stack to -1 (indicating a match skipping the subpattern),
2473                 // and plant a jump to the end. We'll also plant a label to backtrack to
2474                 // to reenter the subpattern later, with a store to set up index on the
2475                 // second iteration.
2476                 //
2477                 // FIXME: for capturing parens, could use the index in the capture array?
2478                 if (term->quantityType == QuantifierGreedy)
2479                     storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex());
2480                 else if (term->quantityType == QuantifierNonGreedy) {
2481                     storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex());
2482                     op.m_jumps.append(jump());
2483                     op.m_reentry = label();
2484                     storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex());
2485                 }
2486
2487                 // If the parenthese are capturing, store the starting index value to the
2488                 // captures array, offsetting as necessary.
2489                 //
2490                 // FIXME: could avoid offsetting this value in JIT code, apply
2491                 // offsets only afterwards, at the point the results array is
2492                 // being accessed.
2493                 if (term->capture() && compileMode == IncludeSubpatterns) {
2494                     unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2495                     if (term->quantityType == QuantifierFixedCount)
2496                         inputOffset += term->parentheses.disjunction->m_minimumSize;
2497                     if (inputOffset) {
2498                         move(index, indexTemporary);
2499                         sub32(Imm32(inputOffset), indexTemporary);
2500                         setSubpatternStart(indexTemporary, term->parentheses.subpatternId);
2501                     } else
2502                         setSubpatternStart(index, term->parentheses.subpatternId);
2503                 }
2504                 break;
2505             }
2506             case OpParenthesesSubpatternOnceEnd: {
2507                 PatternTerm* term = op.m_term;
2508                 const RegisterID indexTemporary = regT0;
2509                 ASSERT(term->quantityMaxCount == 1);
2510
2511                 // Runtime ASSERT to make sure that the nested alternative handled the
2512                 // "no input consumed" check.
2513                 if (!ASSERT_DISABLED && term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) {
2514                     Jump pastBreakpoint;
2515                     pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
2516                     abortWithReason(YARRNoInputConsumed);
2517                     pastBreakpoint.link(this);
2518                 }
2519
2520                 // If the parenthese are capturing, store the ending index value to the
2521                 // captures array, offsetting as necessary.
2522                 //
2523                 // FIXME: could avoid offsetting this value in JIT code, apply
2524                 // offsets only afterwards, at the point the results array is
2525                 // being accessed.
2526                 if (term->capture() && compileMode == IncludeSubpatterns) {
2527                     unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2528                     if (inputOffset) {
2529                         move(index, indexTemporary);
2530                         sub32(Imm32(inputOffset), indexTemporary);
2531                         setSubpatternEnd(indexTemporary, term->parentheses.subpatternId);
2532                     } else
2533                         setSubpatternEnd(index, term->parentheses.subpatternId);
2534                 }
2535
2536                 // If the parentheses are quantified Greedy then add a label to jump back
2537                 // to if we get a failed match from after the parentheses. For NonGreedy
2538                 // parentheses, link the jump from before the subpattern to here.
2539                 if (term->quantityType == QuantifierGreedy)
2540                     op.m_reentry = label();
2541                 else if (term->quantityType == QuantifierNonGreedy) {
2542                     YarrOp& beginOp = m_ops[op.m_previousOp];
2543                     beginOp.m_jumps.link(this);
2544                 }
2545                 break;
2546             }
2547
2548             // OpParenthesesSubpatternTerminalBegin/End
2549             case OpParenthesesSubpatternTerminalBegin: {
2550                 PatternTerm* term = op.m_term;
2551                 ASSERT(term->quantityType == QuantifierGreedy);
2552                 ASSERT(term->quantityMaxCount == quantifyInfinite);
2553                 ASSERT(!term->capture());
2554
2555                 // Upon entry set a label to loop back to.
2556                 op.m_reentry = label();
2557
2558                 // Store the start index of the current match; we need to reject zero
2559                 // length matches.
2560                 storeToFrame(index, term->frameLocation + BackTrackInfoParenthesesTerminal::beginIndex());
2561                 break;
2562             }
2563             case OpParenthesesSubpatternTerminalEnd: {
2564                 YarrOp& beginOp = m_ops[op.m_previousOp];
2565                 if (!ASSERT_DISABLED) {
2566                     PatternTerm* term = op.m_term;
2567                     
2568                     // Runtime ASSERT to make sure that the nested alternative handled the
2569                     // "no input consumed" check.
2570                     Jump pastBreakpoint;
2571                     pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
2572                     abortWithReason(YARRNoInputConsumed);
2573                     pastBreakpoint.link(this);
2574                 }
2575
2576                 // We know that the match is non-zero, we can accept it and
2577                 // loop back up to the head of the subpattern.
2578                 jump(beginOp.m_reentry);
2579
2580                 // This is the entry point to jump to when we stop matching - we will
2581                 // do so once the subpattern cannot match any more.
2582                 op.m_reentry = label();
2583                 break;
2584             }
2585
2586             // OpParenthesesSubpatternBegin/End
2587             //
2588             // These nodes support generic subpatterns.
2589             case OpParenthesesSubpatternBegin: {
2590 #if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
2591                 PatternTerm* term = op.m_term;
2592                 unsigned parenthesesFrameLocation = term->frameLocation;
2593
2594                 // Upon entry to a Greedy quantified set of parenthese store the index.
2595                 // We'll use this for two purposes:
2596                 //  - To indicate which iteration we are on of mathing the remainder of
2597                 //    the expression after the parentheses - the first, including the
2598                 //    match within the parentheses, or the second having skipped over them.
2599                 //  - To check for empty matches, which must be rejected.
2600                 //
2601                 // At the head of a NonGreedy set of parentheses we'll immediately set 'begin'
2602                 // in the backtrack info to -1 (indicating a match skipping the subpattern),
2603                 // and plant a jump to the end. We'll also plant a label to backtrack to
2604                 // to reenter the subpattern later, with a store to set 'begin' to current index
2605                 // on the second iteration.
2606                 //
2607                 // FIXME: for capturing parens, could use the index in the capture array?
2608                 if (term->quantityType == QuantifierGreedy || term->quantityType == QuantifierNonGreedy) {
2609                     storeToFrame(TrustedImm32(0), parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
2610                     storeToFrame(TrustedImmPtr(nullptr), parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
2611
2612                     if (term->quantityType == QuantifierNonGreedy) {
2613                         storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
2614                         op.m_jumps.append(jump());
2615                     }
2616                     
2617                     op.m_reentry = label();
2618                     RegisterID currParenContextReg = regT0;
2619                     RegisterID newParenContextReg = regT1;
2620
2621                     loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex(), currParenContextReg);
2622                     allocateParenContext(newParenContextReg);
2623                     storePtr(currParenContextReg, newParenContextReg);
2624                     storeToFrame(newParenContextReg, parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
2625                     saveParenContext(newParenContextReg, regT2, term->parentheses.subpatternId, term->parentheses.lastSubpatternId, parenthesesFrameLocation);
2626                     storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
2627                 }
2628
2629                 // If the parenthese are capturing, store the starting index value to the
2630                 // captures array, offsetting as necessary.
2631                 //
2632                 // FIXME: could avoid offsetting this value in JIT code, apply
2633                 // offsets only afterwards, at the point the results array is
2634                 // being accessed.
2635                 if (term->capture() && compileMode == IncludeSubpatterns) {
2636                     const RegisterID indexTemporary = regT0;
2637                     unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2638                     if (term->quantityType == QuantifierFixedCount)
2639                         inputOffset += term->parentheses.disjunction->m_minimumSize;
2640                     if (inputOffset) {
2641                         move(index, indexTemporary);
2642                         sub32(Imm32(inputOffset), indexTemporary);
2643                         setSubpatternStart(indexTemporary, term->parentheses.subpatternId);
2644                     } else
2645                         setSubpatternStart(index, term->parentheses.subpatternId);
2646                 }
2647 #else // !YARR_JIT_ALL_PARENS_EXPRESSIONS
2648                 RELEASE_ASSERT_NOT_REACHED();
2649 #endif
2650                 break;
2651             }
2652             case OpParenthesesSubpatternEnd: {
2653 #if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
2654                 PatternTerm* term = op.m_term;
2655                 unsigned parenthesesFrameLocation = term->frameLocation;
2656
2657                 // Runtime ASSERT to make sure that the nested alternative handled the
2658                 // "no input consumed" check.
2659                 if (!ASSERT_DISABLED && term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) {
2660                     Jump pastBreakpoint;
2661                     pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*)));
2662                     abortWithReason(YARRNoInputConsumed);
2663                     pastBreakpoint.link(this);
2664                 }
2665
2666                 const RegisterID countTemporary = regT1;
2667
2668                 YarrOp& beginOp = m_ops[op.m_previousOp];
2669                 loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), countTemporary);
2670                 add32(TrustedImm32(1), countTemporary);
2671                 storeToFrame(countTemporary, parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
2672
2673                 // If the parenthese are capturing, store the ending index value to the
2674                 // captures array, offsetting as necessary.
2675                 //
2676                 // FIXME: could avoid offsetting this value in JIT code, apply
2677                 // offsets only afterwards, at the point the results array is
2678                 // being accessed.
2679                 if (term->capture() && compileMode == IncludeSubpatterns) {
2680                     const RegisterID indexTemporary = regT0;
2681                     
2682                     unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2683                     if (inputOffset) {
2684                         move(index, indexTemporary);
2685                         sub32(Imm32(inputOffset), indexTemporary);
2686                         setSubpatternEnd(indexTemporary, term->parentheses.subpatternId);
2687                     } else
2688                         setSubpatternEnd(index, term->parentheses.subpatternId);
2689                 }
2690
2691                 // If the parentheses are quantified Greedy then add a label to jump back
2692                 // to if we get a failed match from after the parentheses. For NonGreedy
2693                 // parentheses, link the jump from before the subpattern to here.
2694                 if (term->quantityType == QuantifierGreedy) {
2695                     if (term->quantityMaxCount != quantifyInfinite)
2696                         branch32(Below, countTemporary, Imm32(term->quantityMaxCount.unsafeGet())).linkTo(beginOp.m_reentry, this);
2697                     else
2698                         jump(beginOp.m_reentry);
2699                     
2700                     op.m_reentry = label();
2701                 } else if (term->quantityType == QuantifierNonGreedy) {
2702                     YarrOp& beginOp = m_ops[op.m_previousOp];
2703                     beginOp.m_jumps.link(this);
2704                     op.m_reentry = label();
2705                 }
2706 #else // !YARR_JIT_ALL_PARENS_EXPRESSIONS
2707                 RELEASE_ASSERT_NOT_REACHED();
2708 #endif
2709                 break;
2710             }
2711
2712             // OpParentheticalAssertionBegin/End
2713             case OpParentheticalAssertionBegin: {
2714                 PatternTerm* term = op.m_term;
2715
2716                 // Store the current index - assertions should not update index, so
2717                 // we will need to restore it upon a successful match.
2718                 unsigned parenthesesFrameLocation = term->frameLocation;
2719                 storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParentheticalAssertion::beginIndex());
2720
2721                 // Check 
2722                 op.m_checkAdjust = m_checkedOffset - term->inputPosition;
2723                 if (op.m_checkAdjust)
2724                     sub32(Imm32(op.m_checkAdjust.unsafeGet()), index);
2725
2726                 m_checkedOffset -= op.m_checkAdjust;
2727                 break;
2728             }
2729             case OpParentheticalAssertionEnd: {
2730                 PatternTerm* term = op.m_term;
2731
2732                 // Restore the input index value.
2733                 unsigned parenthesesFrameLocation = term->frameLocation;
2734                 loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheticalAssertion::beginIndex(), index);
2735
2736                 // If inverted, a successful match of the assertion must be treated
2737                 // as a failure, so jump to backtracking.
2738                 if (term->invert()) {
2739                     op.m_jumps.append(jump());
2740                     op.m_reentry = label();
2741                 }
2742
2743                 YarrOp& lastOp = m_ops[op.m_previousOp];
2744                 m_checkedOffset += lastOp.m_checkAdjust;
2745                 break;
2746             }
2747
2748             case OpMatchFailed:
2749                 removeCallFrame();
2750                 generateFailReturn();
2751                 break;
2752             }
2753
2754             ++opIndex;
2755         } while (opIndex < m_ops.size());
2756     }
2757
2758     void backtrack()
2759     {
2760         // Backwards generate the backtracking code.
2761         size_t opIndex = m_ops.size();
2762         ASSERT(opIndex);
2763
2764         do {
2765             --opIndex;
2766
2767             if (m_disassembler)
2768                 m_disassembler->setForBacktrack(opIndex, label());
2769
2770             YarrOp& op = m_ops[opIndex];
2771             switch (op.m_op) {
2772
2773             case OpTerm:
2774                 backtrackTerm(opIndex);
2775                 break;
2776
2777             // OpBodyAlternativeBegin/Next/End
2778             //
2779             // For each Begin/Next node representing an alternative, we need to decide what to do
2780             // in two circumstances:
2781             //  - If we backtrack back into this node, from within the alternative.
2782             //  - If the input check at the head of the alternative fails (if this exists).
2783             //
2784             // We treat these two cases differently since in the former case we have slightly
2785             // more information - since we are backtracking out of a prior alternative we know
2786             // that at least enough input was available to run it. For example, given the regular
2787             // expression /a|b/, if we backtrack out of the first alternative (a failed pattern
2788             // character match of 'a'), then we need not perform an additional input availability
2789             // check before running the second alternative.
2790             //
2791             // Backtracking required differs for the last alternative, which in the case of the
2792             // repeating set of alternatives must loop. The code generated for the last alternative
2793             // will also be used to handle all input check failures from any prior alternatives -
2794             // these require similar functionality, in seeking the next available alternative for
2795             // which there is sufficient input.
2796             //
2797             // Since backtracking of all other alternatives simply requires us to link backtracks
2798             // to the reentry point for the subsequent alternative, we will only be generating any
2799             // code when backtracking the last alternative.
2800             case OpBodyAlternativeBegin:
2801             case OpBodyAlternativeNext: {
2802                 PatternAlternative* alternative = op.m_alternative;
2803
2804                 if (op.m_op == OpBodyAlternativeNext) {
2805                     PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
2806                     m_checkedOffset += priorAlternative->m_minimumSize;
2807                 }
2808                 m_checkedOffset -= alternative->m_minimumSize;
2809
2810                 // Is this the last alternative? If not, then if we backtrack to this point we just
2811                 // need to jump to try to match the next alternative.
2812                 if (m_ops[op.m_nextOp].m_op != OpBodyAlternativeEnd) {
2813                     m_backtrackingState.linkTo(m_ops[op.m_nextOp].m_reentry, this);
2814                     break;
2815                 }
2816                 YarrOp& endOp = m_ops[op.m_nextOp];
2817
2818                 YarrOp* beginOp = &op;
2819                 while (beginOp->m_op != OpBodyAlternativeBegin) {
2820                     ASSERT(beginOp->m_op == OpBodyAlternativeNext);
2821                     beginOp = &m_ops[beginOp->m_previousOp];
2822                 }
2823
2824                 bool onceThrough = endOp.m_nextOp == notFound;
2825                 
2826                 JumpList lastStickyAlternativeFailures;
2827
2828                 // First, generate code to handle cases where we backtrack out of an attempted match
2829                 // of the last alternative. If this is a 'once through' set of alternatives then we
2830                 // have nothing to do - link this straight through to the End.
2831                 if (onceThrough)
2832                     m_backtrackingState.linkTo(endOp.m_reentry, this);
2833                 else {
2834                     // If we don't need to move the input poistion, and the pattern has a fixed size
2835                     // (in which case we omit the store of the start index until the pattern has matched)
2836                     // then we can just link the backtrack out of the last alternative straight to the
2837                     // head of the first alternative.
2838                     if (m_pattern.m_body->m_hasFixedSize
2839                         && (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize)
2840                         && (alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize == 1))
2841                         m_backtrackingState.linkTo(beginOp->m_reentry, this);
2842                     else if (m_pattern.sticky() && m_ops[op.m_nextOp].m_op == OpBodyAlternativeEnd) {
2843                         // It is a sticky pattern and the last alternative failed, jump to the end.
2844                         m_backtrackingState.takeBacktracksToJumpList(lastStickyAlternativeFailures, this);
2845                     } else {
2846                         // We need to generate a trampoline of code to execute before looping back
2847                         // around to the first alternative.
2848                         m_backtrackingState.link(this);
2849
2850                         // No need to advance and retry for a sticky pattern.
2851                         if (!m_pattern.sticky()) {
2852                             // If the pattern size is not fixed, then store the start index for use if we match.
2853                             if (!m_pattern.m_body->m_hasFixedSize) {
2854                                 if (alternative->m_minimumSize == 1)
2855                                     setMatchStart(index);
2856                                 else {
2857                                     move(index, regT0);
2858                                     if (alternative->m_minimumSize)
2859                                         sub32(Imm32(alternative->m_minimumSize - 1), regT0);
2860                                     else
2861                                         add32(TrustedImm32(1), regT0);
2862                                     setMatchStart(regT0);
2863                                 }
2864                             }
2865
2866                             // Generate code to loop. Check whether the last alternative is longer than the
2867                             // first (e.g. /a|xy/ or /a|xyz/).
2868                             if (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize) {
2869                                 // We want to loop, and increment input position. If the delta is 1, it is
2870                                 // already correctly incremented, if more than one then decrement as appropriate.
2871                                 unsigned delta = alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize;
2872                                 ASSERT(delta);
2873                                 if (delta != 1)
2874                                     sub32(Imm32(delta - 1), index);
2875                                 jump(beginOp->m_reentry);
2876                             } else {
2877                                 // If the first alternative has minimum size 0xFFFFFFFFu, then there cannot
2878                                 // be sufficent input available to handle this, so just fall through.
2879                                 unsigned delta = beginOp->m_alternative->m_minimumSize - alternative->m_minimumSize;
2880                                 if (delta != 0xFFFFFFFFu) {
2881                                     // We need to check input because we are incrementing the input.
2882                                     add32(Imm32(delta + 1), index);
2883                                     checkInput().linkTo(beginOp->m_reentry, this);
2884                                 }
2885                             }
2886                         }
2887                     }
2888                 }
2889
2890                 // We can reach this point in the code in two ways:
2891                 //  - Fallthrough from the code above (a repeating alternative backtracked out of its
2892                 //    last alternative, and did not have sufficent input to run the first).
2893                 //  - We will loop back up to the following label when a repeating alternative loops,
2894                 //    following a failed input check.
2895                 //
2896                 // Either way, we have just failed the input check for the first alternative.
2897                 Label firstInputCheckFailed(this);
2898
2899                 // Generate code to handle input check failures from alternatives except the last.
2900                 // prevOp is the alternative we're handling a bail out from (initially Begin), and
2901                 // nextOp is the alternative we will be attempting to reenter into.
2902                 // 
2903                 // We will link input check failures from the forwards matching path back to the code
2904                 // that can handle them.
2905                 YarrOp* prevOp = beginOp;
2906                 YarrOp* nextOp = &m_ops[beginOp->m_nextOp];
2907                 while (nextOp->m_op != OpBodyAlternativeEnd) {
2908                     prevOp->m_jumps.link(this);
2909
2910                     // We only get here if an input check fails, it is only worth checking again
2911                     // if the next alternative has a minimum size less than the last.
2912                     if (prevOp->m_alternative->m_minimumSize > nextOp->m_alternative->m_minimumSize) {
2913                         // FIXME: if we added an extra label to YarrOp, we could avoid needing to
2914                         // subtract delta back out, and reduce this code. Should performance test
2915                         // the benefit of this.
2916                         unsigned delta = prevOp->m_alternative->m_minimumSize - nextOp->m_alternative->m_minimumSize;
2917                         sub32(Imm32(delta), index);
2918                         Jump fail = jumpIfNoAvailableInput();
2919                         add32(Imm32(delta), index);
2920                         jump(nextOp->m_reentry);
2921                         fail.link(this);
2922                     } else if (prevOp->m_alternative->m_minimumSize < nextOp->m_alternative->m_minimumSize)
2923                         add32(Imm32(nextOp->m_alternative->m_minimumSize - prevOp->m_alternative->m_minimumSize), index);
2924                     prevOp = nextOp;
2925                     nextOp = &m_ops[nextOp->m_nextOp];
2926                 }
2927
2928                 // We fall through to here if there is insufficient input to run the last alternative.
2929
2930                 // If there is insufficient input to run the last alternative, then for 'once through'
2931                 // alternatives we are done - just jump back up into the forwards matching path at the End.
2932                 if (onceThrough) {
2933                     op.m_jumps.linkTo(endOp.m_reentry, this);
2934                     jump(endOp.m_reentry);
2935                     break;
2936                 }
2937
2938                 // For repeating alternatives, link any input check failure from the last alternative to
2939                 // this point.
2940                 op.m_jumps.link(this);
2941
2942                 bool needsToUpdateMatchStart = !m_pattern.m_body->m_hasFixedSize;
2943
2944                 // Check for cases where input position is already incremented by 1 for the last
2945                 // alternative (this is particularly useful where the minimum size of the body
2946                 // disjunction is 0, e.g. /a*|b/).
2947                 if (needsToUpdateMatchStart && alternative->m_minimumSize == 1) {
2948                     // index is already incremented by 1, so just store it now!
2949                     setMatchStart(index);
2950                     needsToUpdateMatchStart = false;
2951                 }
2952
2953                 if (!m_pattern.sticky()) {
2954                     // Check whether there is sufficient input to loop. Increment the input position by
2955                     // one, and check. Also add in the minimum disjunction size before checking - there
2956                     // is no point in looping if we're just going to fail all the input checks around
2957                     // the next iteration.
2958                     ASSERT(alternative->m_minimumSize >= m_pattern.m_body->m_minimumSize);
2959                     if (alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) {
2960                         // If the last alternative had the same minimum size as the disjunction,
2961                         // just simply increment input pos by 1, no adjustment based on minimum size.
2962                         add32(TrustedImm32(1), index);
2963                     } else {
2964                         // If the minumum for the last alternative was one greater than than that
2965                         // for the disjunction, we're already progressed by 1, nothing to do!
2966                         unsigned delta = (alternative->m_minimumSize - m_pattern.m_body->m_minimumSize) - 1;
2967                         if (delta)
2968                             sub32(Imm32(delta), index);
2969                     }
2970                     Jump matchFailed = jumpIfNoAvailableInput();
2971
2972                     if (needsToUpdateMatchStart) {
2973                         if (!m_pattern.m_body->m_minimumSize)
2974                             setMatchStart(index);
2975                         else {
2976                             move(index, regT0);
2977                             sub32(Imm32(m_pattern.m_body->m_minimumSize), regT0);
2978                             setMatchStart(regT0);
2979                         }
2980                     }
2981
2982                     // Calculate how much more input the first alternative requires than the minimum
2983                     // for the body as a whole. If no more is needed then we dont need an additional
2984                     // input check here - jump straight back up to the start of the first alternative.
2985                     if (beginOp->m_alternative->m_minimumSize == m_pattern.m_body->m_minimumSize)
2986                         jump(beginOp->m_reentry);
2987                     else {
2988                         if (beginOp->m_alternative->m_minimumSize > m_pattern.m_body->m_minimumSize)
2989                             add32(Imm32(beginOp->m_alternative->m_minimumSize - m_pattern.m_body->m_minimumSize), index);
2990                         else
2991                             sub32(Imm32(m_pattern.m_body->m_minimumSize - beginOp->m_alternative->m_minimumSize), index);
2992                         checkInput().linkTo(beginOp->m_reentry, this);
2993                         jump(firstInputCheckFailed);
2994                     }
2995
2996                     // We jump to here if we iterate to the point that there is insufficient input to
2997                     // run any matches, and need to return a failure state from JIT code.
2998                     matchFailed.link(this);
2999                 }
3000
3001                 lastStickyAlternativeFailures.link(this);
3002                 removeCallFrame();
3003                 generateFailReturn();
3004                 break;
3005             }
3006             case OpBodyAlternativeEnd: {
3007                 // We should never backtrack back into a body disjunction.
3008                 ASSERT(m_backtrackingState.isEmpty());
3009
3010                 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
3011                 m_checkedOffset += priorAlternative->m_minimumSize;
3012                 break;
3013             }
3014
3015             // OpSimpleNestedAlternativeBegin/Next/End
3016             // OpNestedAlternativeBegin/Next/End
3017             //
3018             // Generate code for when we backtrack back out of an alternative into
3019             // a Begin or Next node, or when the entry input count check fails. If
3020             // there are more alternatives we need to jump to the next alternative,
3021             // if not we backtrack back out of the current set of parentheses.
3022             //
3023             // In the case of non-simple nested assertions we need to also link the
3024             // 'return address' appropriately to backtrack back out into the correct
3025             // alternative.
3026             case OpSimpleNestedAlternativeBegin:
3027             case OpSimpleNestedAlternativeNext:
3028             case OpNestedAlternativeBegin:
3029             case OpNestedAlternativeNext: {
3030                 YarrOp& nextOp = m_ops[op.m_nextOp];
3031                 bool isBegin = op.m_previousOp == notFound;
3032                 bool isLastAlternative = nextOp.m_nextOp == notFound;
3033                 ASSERT(isBegin == (op.m_op == OpSimpleNestedAlternativeBegin || op.m_op == OpNestedAlternativeBegin));
3034                 ASSERT(isLastAlternative == (nextOp.m_op == OpSimpleNestedAlternativeEnd || nextOp.m_op == OpNestedAlternativeEnd));
3035
3036                 // Treat an input check failure the same as a failed match.
3037                 m_backtrackingState.append(op.m_jumps);
3038
3039                 // Set the backtracks to jump to the appropriate place. We may need
3040                 // to link the backtracks in one of three different way depending on
3041                 // the type of alternative we are dealing with:
3042                 //  - A single alternative, with no simplings.
3043                 //  - The last alternative of a set of two or more.
3044                 //  - An alternative other than the last of a set of two or more.
3045                 //
3046                 // In the case of a single alternative on its own, we don't need to
3047                 // jump anywhere - if the alternative fails to match we can just
3048                 // continue to backtrack out of the parentheses without jumping.
3049                 //
3050                 // In the case of the last alternative in a set of more than one, we
3051                 // need to jump to return back out to the beginning. We'll do so by
3052                 // adding a jump to the End node's m_jumps list, and linking this
3053                 // when we come to generate the Begin node. For alternatives other
3054                 // than the last, we need to jump to the next alternative.
3055                 //
3056                 // If the alternative had adjusted the input position we must link
3057                 // backtracking to here, correct, and then jump on. If not we can
3058                 // link the backtracks directly to their destination.
3059                 if (op.m_checkAdjust) {
3060                     // Handle the cases where we need to link the backtracks here.
3061                     m_backtrackingState.link(this);
3062                     sub32(Imm32(op.m_checkAdjust.unsafeGet()), index);
3063                     if (!isLastAlternative) {
3064                         // An alternative that is not the last should jump to its successor.
3065                         jump(nextOp.m_reentry);
3066                     } else if (!isBegin) {
3067                         // The last of more than one alternatives must jump back to the beginning.
3068                         nextOp.m_jumps.append(jump());
3069                     } else {
3070                         // A single alternative on its own can fall through.
3071                         m_backtrackingState.fallthrough();
3072                     }
3073                 } else {
3074                     // Handle the cases where we can link the backtracks directly to their destinations.
3075                     if (!isLastAlternative) {
3076                         // An alternative that is not the last should jump to its successor.
3077                         m_backtrackingState.linkTo(nextOp.m_reentry, this);
3078                     } else if (!isBegin) {
3079                         // The last of more than one alternatives must jump back to the beginning.
3080                         m_backtrackingState.takeBacktracksToJumpList(nextOp.m_jumps, this);
3081                     }
3082                     // In the case of a single alternative on its own do nothing - it can fall through.
3083                 }
3084
3085                 // If there is a backtrack jump from a zero length match link it here.
3086                 if (op.m_zeroLengthMatch.isSet())
3087                     m_backtrackingState.append(op.m_zeroLengthMatch);
3088
3089                 // At this point we've handled the backtracking back into this node.
3090                 // Now link any backtracks that need to jump to here.