Versioning.
[WebKit-https.git] / Source / JavaScriptCore / yarr / YarrJIT.cpp
1 /*
2  * Copyright (C) 2009-2018 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
24  */
25
26 #include "config.h"
27 #include "YarrJIT.h"
28
29 #include <wtf/ASCIICType.h>
30 #include "LinkBuffer.h"
31 #include "Options.h"
32 #include "VM.h"
33 #include "Yarr.h"
34 #include "YarrCanonicalize.h"
35 #include "YarrDisassembler.h"
36
37 #if ENABLE(YARR_JIT)
38
39 namespace JSC { namespace Yarr {
40
41 template<YarrJITCompileMode compileMode>
42 class YarrGenerator : public YarrJITInfo, private MacroAssembler {
43
44 #if CPU(ARM_THUMB2)
45     static const RegisterID input = ARMRegisters::r0;
46     static const RegisterID index = ARMRegisters::r1;
47     static const RegisterID length = ARMRegisters::r2;
48     static const RegisterID output = ARMRegisters::r3;
49
50     static const RegisterID regT0 = ARMRegisters::r4;
51     static const RegisterID regT1 = ARMRegisters::r5;
52     static const RegisterID initialStart = ARMRegisters::r8;
53
54     static const RegisterID returnRegister = ARMRegisters::r0;
55     static const RegisterID returnRegister2 = ARMRegisters::r1;
56
57 #define HAVE_INITIAL_START_REG
58 #elif CPU(ARM64)
59     // Argument registers
60     static const RegisterID input = ARM64Registers::x0;
61     static const RegisterID index = ARM64Registers::x1;
62     static const RegisterID length = ARM64Registers::x2;
63     static const RegisterID output = ARM64Registers::x3;
64     static const RegisterID freelistRegister = ARM64Registers::x4;
65     static const RegisterID freelistSizeRegister = ARM64Registers::x5;
66
67     // Scratch registers
68     static const RegisterID regT0 = ARM64Registers::x6;
69     static const RegisterID regT1 = ARM64Registers::x7;
70     static const RegisterID regT2 = ARM64Registers::x8;
71     static const RegisterID remainingMatchCount = ARM64Registers::x9;
72     static const RegisterID regUnicodeInputAndTrail = ARM64Registers::x10;
73     static const RegisterID initialStart = ARM64Registers::x11;
74     static const RegisterID supplementaryPlanesBase = ARM64Registers::x12;
75     static const RegisterID leadingSurrogateTag = ARM64Registers::x13;
76     static const RegisterID trailingSurrogateTag = ARM64Registers::x14;
77     static const RegisterID endOfStringAddress = ARM64Registers::x15;
78
79     static const RegisterID returnRegister = ARM64Registers::x0;
80     static const RegisterID returnRegister2 = ARM64Registers::x1;
81
82     const TrustedImm32 surrogateTagMask = TrustedImm32(0xfffffc00);
83 #define HAVE_INITIAL_START_REG
84 #define JIT_UNICODE_EXPRESSIONS
85 #elif CPU(MIPS)
86     static const RegisterID input = MIPSRegisters::a0;
87     static const RegisterID index = MIPSRegisters::a1;
88     static const RegisterID length = MIPSRegisters::a2;
89     static const RegisterID output = MIPSRegisters::a3;
90
91     static const RegisterID regT0 = MIPSRegisters::t4;
92     static const RegisterID regT1 = MIPSRegisters::t5;
93     static const RegisterID initialStart = MIPSRegisters::t6;
94
95     static const RegisterID returnRegister = MIPSRegisters::v0;
96     static const RegisterID returnRegister2 = MIPSRegisters::v1;
97
98 #define HAVE_INITIAL_START_REG
99 #elif CPU(X86)
100     static const RegisterID input = X86Registers::eax;
101     static const RegisterID index = X86Registers::edx;
102     static const RegisterID length = X86Registers::ecx;
103     static const RegisterID output = X86Registers::edi;
104
105     static const RegisterID regT0 = X86Registers::ebx;
106     static const RegisterID regT1 = X86Registers::esi;
107
108     static const RegisterID returnRegister = X86Registers::eax;
109     static const RegisterID returnRegister2 = X86Registers::edx;
110 #elif CPU(X86_64)
111 #if !OS(WINDOWS)
112     // Argument registers
113     static const RegisterID input = X86Registers::edi;
114     static const RegisterID index = X86Registers::esi;
115     static const RegisterID length = X86Registers::edx;
116     static const RegisterID output = X86Registers::ecx;
117     static const RegisterID freelistRegister = X86Registers::r8;
118     static const RegisterID freelistSizeRegister = X86Registers::r9; // Only used during initialization.
119 #else
120     // If the return value doesn't fit in 64bits, its destination is pointed by rcx and the parameters are shifted.
121     // http://msdn.microsoft.com/en-us/library/7572ztz4.aspx
122     COMPILE_ASSERT(sizeof(MatchResult) > sizeof(void*), MatchResult_does_not_fit_in_64bits);
123     static const RegisterID input = X86Registers::edx;
124     static const RegisterID index = X86Registers::r8;
125     static const RegisterID length = X86Registers::r9;
126     static const RegisterID output = X86Registers::r10;
127 #endif
128
129     // Scratch registers
130     static const RegisterID regT0 = X86Registers::eax;
131 #if !OS(WINDOWS)
132     static const RegisterID regT1 = X86Registers::r9;
133     static const RegisterID regT2 = X86Registers::r10;
134 #else
135     static const RegisterID regT1 = X86Registers::ecx;
136     static const RegisterID regT2 = X86Registers::edi;
137 #endif
138
139     static const RegisterID initialStart = X86Registers::ebx;
140 #if !OS(WINDOWS)
141     static const RegisterID remainingMatchCount = X86Registers::r12;
142 #else
143     static const RegisterID remainingMatchCount = X86Registers::esi;
144 #endif
145     static const RegisterID regUnicodeInputAndTrail = X86Registers::r13;
146     static const RegisterID leadingSurrogateTag = X86Registers::r14;
147     static const RegisterID endOfStringAddress = X86Registers::r15;
148
149     static const RegisterID returnRegister = X86Registers::eax;
150     static const RegisterID returnRegister2 = X86Registers::edx;
151
152     const TrustedImm32 supplementaryPlanesBase = TrustedImm32(0x10000);
153     const TrustedImm32 trailingSurrogateTag = TrustedImm32(0xdc00);
154     const TrustedImm32 surrogateTagMask = TrustedImm32(0xfffffc00);
155 #define HAVE_INITIAL_START_REG
156 #define JIT_UNICODE_EXPRESSIONS
157 #endif
158
159 #if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
160     struct ParenContextSizes {
161         size_t m_numSubpatterns;
162         size_t m_frameSlots;
163
164         ParenContextSizes(size_t numSubpatterns, size_t frameSlots)
165             : m_numSubpatterns(numSubpatterns)
166             , m_frameSlots(frameSlots)
167         {
168         }
169
170         size_t numSubpatterns() { return m_numSubpatterns; }
171
172         size_t frameSlots() { return m_frameSlots; }
173     };
174
175     struct ParenContext {
176         struct ParenContext* next;
177         uint32_t begin;
178         uint32_t matchAmount;
179         uintptr_t returnAddress;
180         struct Subpatterns {
181             unsigned start;
182             unsigned end;
183         } subpatterns[0];
184         uintptr_t frameSlots[0];
185
186         static size_t sizeFor(ParenContextSizes& parenContextSizes)
187         {
188             return sizeof(ParenContext) + sizeof(Subpatterns) * parenContextSizes.numSubpatterns() + sizeof(uintptr_t) * parenContextSizes.frameSlots();
189         }
190
191         static ptrdiff_t nextOffset()
192         {
193             return offsetof(ParenContext, next);
194         }
195
196         static ptrdiff_t beginOffset()
197         {
198             return offsetof(ParenContext, begin);
199         }
200
201         static ptrdiff_t matchAmountOffset()
202         {
203             return offsetof(ParenContext, matchAmount);
204         }
205
206         static ptrdiff_t returnAddressOffset()
207         {
208             return offsetof(ParenContext, returnAddress);
209         }
210
211         static ptrdiff_t subpatternOffset(size_t subpattern)
212         {
213             return offsetof(ParenContext, subpatterns) + (subpattern - 1) * sizeof(Subpatterns);
214         }
215
216         static ptrdiff_t savedFrameOffset(ParenContextSizes& parenContextSizes)
217         {
218             return offsetof(ParenContext, subpatterns) + (parenContextSizes.numSubpatterns()) * sizeof(Subpatterns);
219         }
220     };
221
222     void initParenContextFreeList()
223     {
224         RegisterID parenContextPointer = regT0;
225         RegisterID nextParenContextPointer = regT2;
226
227         size_t parenContextSize = ParenContext::sizeFor(m_parenContextSizes);
228
229         parenContextSize = WTF::roundUpToMultipleOf<sizeof(uintptr_t)>(parenContextSize);
230
231         // Check that the paren context is a reasonable size.
232         if (parenContextSize > INT16_MAX)
233             m_abortExecution.append(jump());
234
235         Jump emptyFreeList = branchTestPtr(Zero, freelistRegister);
236         move(freelistRegister, parenContextPointer);
237         addPtr(TrustedImm32(parenContextSize), freelistRegister, nextParenContextPointer);
238         addPtr(freelistRegister, freelistSizeRegister);
239         subPtr(TrustedImm32(parenContextSize), freelistSizeRegister);
240
241         Label loopTop(this);
242         Jump initDone = branchPtr(Above, nextParenContextPointer, freelistSizeRegister);
243         storePtr(nextParenContextPointer, Address(parenContextPointer, ParenContext::nextOffset()));
244         move(nextParenContextPointer, parenContextPointer);
245         addPtr(TrustedImm32(parenContextSize), parenContextPointer, nextParenContextPointer);
246         jump(loopTop);
247
248         initDone.link(this);
249         storePtr(TrustedImmPtr(nullptr), Address(parenContextPointer, ParenContext::nextOffset()));
250         emptyFreeList.link(this);
251     }
252
253     void allocateParenContext(RegisterID result)
254     {
255         m_abortExecution.append(branchTestPtr(Zero, freelistRegister));
256         sub32(TrustedImm32(1), remainingMatchCount);
257         m_hitMatchLimit.append(branchTestPtr(Zero, remainingMatchCount));
258         move(freelistRegister, result);
259         loadPtr(Address(freelistRegister, ParenContext::nextOffset()), freelistRegister);
260     }
261
262     void freeParenContext(RegisterID headPtrRegister, RegisterID newHeadPtrRegister)
263     {
264         loadPtr(Address(headPtrRegister, ParenContext::nextOffset()), newHeadPtrRegister);
265         storePtr(freelistRegister, Address(headPtrRegister, ParenContext::nextOffset()));
266         move(headPtrRegister, freelistRegister);
267     }
268
269     void saveParenContext(RegisterID parenContextReg, RegisterID tempReg, unsigned firstSubpattern, unsigned lastSubpattern, unsigned subpatternBaseFrameLocation)
270     {
271         store32(index, Address(parenContextReg, ParenContext::beginOffset()));
272         loadFromFrame(subpatternBaseFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), tempReg);
273         store32(tempReg, Address(parenContextReg, ParenContext::matchAmountOffset()));
274         loadFromFrame(subpatternBaseFrameLocation + BackTrackInfoParentheses::returnAddressIndex(), tempReg);
275         storePtr(tempReg, Address(parenContextReg, ParenContext::returnAddressOffset()));
276         if (compileMode == IncludeSubpatterns) {
277             for (unsigned subpattern = firstSubpattern; subpattern <= lastSubpattern; subpattern++) {
278                 loadPtr(Address(output, (subpattern << 1) * sizeof(unsigned)), tempReg);
279                 storePtr(tempReg, Address(parenContextReg, ParenContext::subpatternOffset(subpattern)));
280                 clearSubpatternStart(subpattern);
281             }
282         }
283         subpatternBaseFrameLocation += YarrStackSpaceForBackTrackInfoParentheses;
284         for (unsigned frameLocation = subpatternBaseFrameLocation; frameLocation < m_parenContextSizes.frameSlots(); frameLocation++) {
285             loadFromFrame(frameLocation, tempReg);
286             storePtr(tempReg, Address(parenContextReg, ParenContext::savedFrameOffset(m_parenContextSizes) + frameLocation * sizeof(uintptr_t)));
287         }
288     }
289
290     void restoreParenContext(RegisterID parenContextReg, RegisterID tempReg, unsigned firstSubpattern, unsigned lastSubpattern, unsigned subpatternBaseFrameLocation)
291     {
292         load32(Address(parenContextReg, ParenContext::beginOffset()), index);
293         storeToFrame(index, subpatternBaseFrameLocation + BackTrackInfoParentheses::beginIndex());
294         load32(Address(parenContextReg, ParenContext::matchAmountOffset()), tempReg);
295         storeToFrame(tempReg, subpatternBaseFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
296         loadPtr(Address(parenContextReg, ParenContext::returnAddressOffset()), tempReg);
297         storeToFrame(tempReg, subpatternBaseFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
298         if (compileMode == IncludeSubpatterns) {
299             for (unsigned subpattern = firstSubpattern; subpattern <= lastSubpattern; subpattern++) {
300                 loadPtr(Address(parenContextReg, ParenContext::subpatternOffset(subpattern)), tempReg);
301                 storePtr(tempReg, Address(output, (subpattern << 1) * sizeof(unsigned)));
302             }
303         }
304         subpatternBaseFrameLocation += YarrStackSpaceForBackTrackInfoParentheses;
305         for (unsigned frameLocation = subpatternBaseFrameLocation; frameLocation < m_parenContextSizes.frameSlots(); frameLocation++) {
306             loadPtr(Address(parenContextReg, ParenContext::savedFrameOffset(m_parenContextSizes) + frameLocation * sizeof(uintptr_t)), tempReg);
307             storeToFrame(tempReg, frameLocation);
308         }
309     }
310 #endif
311
312     void optimizeAlternative(PatternAlternative* alternative)
313     {
314         if (!alternative->m_terms.size())
315             return;
316
317         for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) {
318             PatternTerm& term = alternative->m_terms[i];
319             PatternTerm& nextTerm = alternative->m_terms[i + 1];
320
321             // We can move BMP only character classes after fixed character terms.
322             if ((term.type == PatternTerm::TypeCharacterClass)
323                 && (term.quantityType == QuantifierFixedCount)
324                 && (!m_decodeSurrogatePairs || (term.characterClass->hasOneCharacterSize() && !term.m_invert))
325                 && (nextTerm.type == PatternTerm::TypePatternCharacter)
326                 && (nextTerm.quantityType == QuantifierFixedCount)) {
327                 PatternTerm termCopy = term;
328                 alternative->m_terms[i] = nextTerm;
329                 alternative->m_terms[i + 1] = termCopy;
330             }
331         }
332     }
333
334     void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar32* matches, unsigned matchCount)
335     {
336         do {
337             // pick which range we're going to generate
338             int which = count >> 1;
339             char lo = ranges[which].begin;
340             char hi = ranges[which].end;
341
342             // check if there are any ranges or matches below lo.  If not, just jl to failure -
343             // if there is anything else to check, check that first, if it falls through jmp to failure.
344             if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
345                 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
346
347                 // generate code for all ranges before this one
348                 if (which)
349                     matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
350
351                 while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
352                     matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
353                     ++*matchIndex;
354                 }
355                 failures.append(jump());
356
357                 loOrAbove.link(this);
358             } else if (which) {
359                 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
360
361                 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
362                 failures.append(jump());
363
364                 loOrAbove.link(this);
365             } else
366                 failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));
367
368             while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
369                 ++*matchIndex;
370
371             matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
372             // fall through to here, the value is above hi.
373
374             // shuffle along & loop around if there are any more matches to handle.
375             unsigned next = which + 1;
376             ranges += next;
377             count -= next;
378         } while (count);
379     }
380
381     void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
382     {
383         if (charClass->m_table && !m_decodeSurrogatePairs) {
384             ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table));
385             matchDest.append(branchTest8(charClass->m_tableInverted ? Zero : NonZero, tableEntry));
386             return;
387         }
388
389         JumpList unicodeFail;
390         if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
391             JumpList isAscii;
392             if (charClass->m_matches.size() || charClass->m_ranges.size())
393                 isAscii.append(branch32(LessThanOrEqual, character, TrustedImm32(0x7f)));
394
395             if (charClass->m_matchesUnicode.size()) {
396                 for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
397                     UChar32 ch = charClass->m_matchesUnicode[i];
398                     matchDest.append(branch32(Equal, character, Imm32(ch)));
399                 }
400             }
401
402             if (charClass->m_rangesUnicode.size()) {
403                 for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
404                     UChar32 lo = charClass->m_rangesUnicode[i].begin;
405                     UChar32 hi = charClass->m_rangesUnicode[i].end;
406
407                     Jump below = branch32(LessThan, character, Imm32(lo));
408                     matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
409                     below.link(this);
410                 }
411             }
412
413             if (charClass->m_matches.size() || charClass->m_ranges.size())
414                 unicodeFail = jump();
415             isAscii.link(this);
416         }
417
418         if (charClass->m_ranges.size()) {
419             unsigned matchIndex = 0;
420             JumpList failures;
421             matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size());
422             while (matchIndex < charClass->m_matches.size())
423                 matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++])));
424
425             failures.link(this);
426         } else if (charClass->m_matches.size()) {
427             // optimization: gather 'a','A' etc back together, can mask & test once.
428             Vector<char> matchesAZaz;
429
430             for (unsigned i = 0; i < charClass->m_matches.size(); ++i) {
431                 char ch = charClass->m_matches[i];
432                 if (m_pattern.ignoreCase()) {
433                     if (isASCIILower(ch)) {
434                         matchesAZaz.append(ch);
435                         continue;
436                     }
437                     if (isASCIIUpper(ch))
438                         continue;
439                 }
440                 matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
441             }
442
443             if (unsigned countAZaz = matchesAZaz.size()) {
444                 or32(TrustedImm32(32), character);
445                 for (unsigned i = 0; i < countAZaz; ++i)
446                     matchDest.append(branch32(Equal, character, TrustedImm32(matchesAZaz[i])));
447             }
448         }
449
450         if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size())
451             unicodeFail.link(this);
452     }
453
454 #ifdef JIT_UNICODE_EXPRESSIONS
455     void advanceIndexAfterCharacterClassTermMatch(const PatternTerm* term, JumpList& failures, const RegisterID character)
456     {
457         ASSERT(term->type == PatternTerm::TypeCharacterClass);
458
459         if (term->characterClass->hasOneCharacterSize() && !term->invert())
460             add32(TrustedImm32(term->characterClass->hasNonBMPCharacters() ? 2 : 1), index);
461         else {
462             add32(TrustedImm32(1), index);
463             failures.append(atEndOfInput());
464             Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
465             add32(TrustedImm32(1), index);
466             isBMPChar.link(this);
467         }
468     }
469 #endif
470
471     // Jumps if input not available; will have (incorrectly) incremented already!
472     Jump jumpIfNoAvailableInput(unsigned countToCheck = 0)
473     {
474         if (countToCheck)
475             add32(Imm32(countToCheck), index);
476         return branch32(Above, index, length);
477     }
478
479     Jump jumpIfAvailableInput(unsigned countToCheck)
480     {
481         add32(Imm32(countToCheck), index);
482         return branch32(BelowOrEqual, index, length);
483     }
484
485     Jump checkNotEnoughInput(RegisterID additionalAmount)
486     {
487         add32(index, additionalAmount);
488         return branch32(Above, additionalAmount, length);
489     }
490
491     Jump checkInput()
492     {
493         return branch32(BelowOrEqual, index, length);
494     }
495
496     Jump atEndOfInput()
497     {
498         return branch32(Equal, index, length);
499     }
500
501     Jump notAtEndOfInput()
502     {
503         return branch32(NotEqual, index, length);
504     }
505
506     BaseIndex negativeOffsetIndexedAddress(Checked<unsigned> negativeCharacterOffset, RegisterID tempReg, RegisterID indexReg = index)
507     {
508         RegisterID base = input;
509
510         // BaseIndex() addressing can take a int32_t offset. Given that we can have a regular
511         // expression that has unsigned character offsets, BaseIndex's signed offset is insufficient
512         // for addressing in extreme cases where we might underflow. Therefore we check to see if
513         // negativeCharacterOffset will underflow directly or after converting for 16 bit characters.
514         // If so, we do our own address calculating by adjusting the base, using the result register
515         // as a temp address register.
516         unsigned maximumNegativeOffsetForCharacterSize = m_charSize == Char8 ? 0x7fffffff : 0x3fffffff;
517         unsigned offsetAdjustAmount = 0x40000000;
518         if (negativeCharacterOffset.unsafeGet() > maximumNegativeOffsetForCharacterSize) {
519             base = tempReg;
520             move(input, base);
521             while (negativeCharacterOffset.unsafeGet() > maximumNegativeOffsetForCharacterSize) {
522                 subPtr(TrustedImm32(offsetAdjustAmount), base);
523                 if (m_charSize != Char8)
524                     subPtr(TrustedImm32(offsetAdjustAmount), base);
525                 negativeCharacterOffset -= offsetAdjustAmount;
526             }
527         }
528
529         Checked<int32_t> characterOffset(-static_cast<int32_t>(negativeCharacterOffset.unsafeGet()));
530
531         if (m_charSize == Char8)
532             return BaseIndex(input, indexReg, TimesOne, (characterOffset * static_cast<int32_t>(sizeof(char))).unsafeGet());
533
534         return BaseIndex(input, indexReg, TimesTwo, (characterOffset * static_cast<int32_t>(sizeof(UChar))).unsafeGet());
535     }
536
537 #ifdef JIT_UNICODE_EXPRESSIONS
538     void tryReadUnicodeCharImpl(RegisterID resultReg)
539     {
540         ASSERT(m_charSize == Char16);
541
542         JumpList notUnicode;
543
544         load16Unaligned(regUnicodeInputAndTrail, resultReg);
545         and32(surrogateTagMask, resultReg, regT2);
546         notUnicode.append(branch32(NotEqual, regT2, leadingSurrogateTag));
547         addPtr(TrustedImm32(2), regUnicodeInputAndTrail);
548         notUnicode.append(branchPtr(AboveOrEqual, regUnicodeInputAndTrail, endOfStringAddress));
549         load16Unaligned(Address(regUnicodeInputAndTrail), regUnicodeInputAndTrail);
550         and32(surrogateTagMask, regUnicodeInputAndTrail, regT2);
551         notUnicode.append(branch32(NotEqual, regT2, trailingSurrogateTag));
552         sub32(leadingSurrogateTag, resultReg);
553         sub32(trailingSurrogateTag, regUnicodeInputAndTrail);
554         lshift32(TrustedImm32(10), resultReg);
555         or32(regUnicodeInputAndTrail, resultReg);
556         add32(supplementaryPlanesBase, resultReg);
557         notUnicode.link(this);
558     }
559
560     void tryReadUnicodeChar(BaseIndex address, RegisterID resultReg)
561     {
562         ASSERT(m_charSize == Char16);
563
564         getEffectiveAddress(address, regUnicodeInputAndTrail);
565
566         if (resultReg == regT0)
567             m_tryReadUnicodeCharacterCalls.append(nearCall());
568         else
569             tryReadUnicodeCharImpl(resultReg);
570     }
571 #endif
572
573     void readCharacterDontDecodeSurrogates(Checked<unsigned> negativeCharacterOffset, RegisterID resultReg, RegisterID indexReg = index)
574     {
575         BaseIndex address = negativeOffsetIndexedAddress(negativeCharacterOffset, resultReg, indexReg);
576         
577         if (m_charSize == Char8)
578             load8(address, resultReg);
579         else
580             load16Unaligned(address, resultReg);
581     }
582     
583     void readCharacter(Checked<unsigned> negativeCharacterOffset, RegisterID resultReg, RegisterID indexReg = index)
584     {
585         BaseIndex address = negativeOffsetIndexedAddress(negativeCharacterOffset, resultReg, indexReg);
586
587         if (m_charSize == Char8)
588             load8(address, resultReg);
589 #ifdef JIT_UNICODE_EXPRESSIONS
590         else if (m_decodeSurrogatePairs)
591             tryReadUnicodeChar(address, resultReg);
592 #endif
593         else
594             load16Unaligned(address, resultReg);
595     }
596
597     Jump jumpIfCharNotEquals(UChar32 ch, Checked<unsigned> negativeCharacterOffset, RegisterID character)
598     {
599         readCharacter(negativeCharacterOffset, character);
600
601         // For case-insesitive compares, non-ascii characters that have different
602         // upper & lower case representations are converted to a character class.
603         ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(ch) || isCanonicallyUnique(ch, m_canonicalMode));
604         if (m_pattern.ignoreCase() && isASCIIAlpha(ch)) {
605             or32(TrustedImm32(0x20), character);
606             ch |= 0x20;
607         }
608
609         return branch32(NotEqual, character, Imm32(ch));
610     }
611     
612     void storeToFrame(RegisterID reg, unsigned frameLocation)
613     {
614         poke(reg, frameLocation);
615     }
616
617     void storeToFrame(TrustedImm32 imm, unsigned frameLocation)
618     {
619         poke(imm, frameLocation);
620     }
621
622 #if CPU(ARM64) || CPU(X86_64)
623     void storeToFrame(TrustedImmPtr imm, unsigned frameLocation)
624     {
625         poke(imm, frameLocation);
626     }
627 #endif
628
629     DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
630     {
631         return storePtrWithPatch(TrustedImmPtr(nullptr), Address(stackPointerRegister, frameLocation * sizeof(void*)));
632     }
633
634     void loadFromFrame(unsigned frameLocation, RegisterID reg)
635     {
636         peek(reg, frameLocation);
637     }
638
639     void loadFromFrameAndJump(unsigned frameLocation)
640     {
641         jump(Address(stackPointerRegister, frameLocation * sizeof(void*)), YarrBacktrackPtrTag);
642     }
643
644     unsigned alignCallFrameSizeInBytes(unsigned callFrameSize)
645     {
646         if (!callFrameSize)
647             return 0;
648
649         callFrameSize *= sizeof(void*);
650         if (callFrameSize / sizeof(void*) != m_pattern.m_body->m_callFrameSize)
651             CRASH();
652         callFrameSize = (callFrameSize + 0x3f) & ~0x3f;
653         return callFrameSize;
654     }
655     void initCallFrame()
656     {
657         unsigned callFrameSizeInBytes = alignCallFrameSizeInBytes(m_pattern.m_body->m_callFrameSize);
658         if (callFrameSizeInBytes) {
659 #if CPU(X86_64) || CPU(ARM64)
660             if (Options::zeroStackFrame()) {
661                 // We need to start from the stack pointer, because we could have spilled callee saves
662                 move(stackPointerRegister, regT0);
663                 subPtr(Imm32(callFrameSizeInBytes), stackPointerRegister);
664                 if (callFrameSizeInBytes <= 128) {
665                     for (unsigned offset = 0; offset < callFrameSizeInBytes; offset += sizeof(intptr_t))
666                         storePtr(TrustedImm32(0), Address(regT0, -8 - offset));
667                 } else {
668                     Label zeroLoop = label();
669                     subPtr(TrustedImm32(sizeof(intptr_t) * 2), regT0);
670 #if CPU(ARM64)
671                     storePair64(ARM64Registers::zr, ARM64Registers::zr, regT0);
672 #else
673                     storePtr(TrustedImm32(0), Address(regT0));
674                     storePtr(TrustedImm32(0), Address(regT0, sizeof(intptr_t)));
675 #endif
676                     branchPtr(NotEqual, regT0, stackPointerRegister).linkTo(zeroLoop, this);
677                 }
678             } else
679 #endif
680                 subPtr(Imm32(callFrameSizeInBytes), stackPointerRegister);
681
682         }
683     }
684     void removeCallFrame()
685     {
686         unsigned callFrameSizeInBytes = alignCallFrameSizeInBytes(m_pattern.m_body->m_callFrameSize);
687         if (callFrameSizeInBytes)
688             addPtr(Imm32(callFrameSizeInBytes), stackPointerRegister);
689     }
690
691     void generateFailReturn()
692     {
693         move(TrustedImmPtr((void*)WTF::notFound), returnRegister);
694         move(TrustedImm32(0), returnRegister2);
695         generateReturn();
696     }
697
698     void generateJITFailReturn()
699     {
700         if (m_abortExecution.empty() && m_hitMatchLimit.empty())
701             return;
702
703         JumpList finishExiting;
704         if (!m_abortExecution.empty()) {
705             m_abortExecution.link(this);
706             move(TrustedImmPtr((void*)static_cast<size_t>(-2)), returnRegister);
707             finishExiting.append(jump());
708         }
709
710         if (!m_hitMatchLimit.empty()) {
711             m_hitMatchLimit.link(this);
712             move(TrustedImmPtr((void*)static_cast<size_t>(-1)), returnRegister);
713         }
714
715         finishExiting.link(this);
716         removeCallFrame();
717         move(TrustedImm32(0), returnRegister2);
718         generateReturn();
719     }
720
721     // Used to record subpatterns, should only be called if compileMode is IncludeSubpatterns.
722     void setSubpatternStart(RegisterID reg, unsigned subpattern)
723     {
724         ASSERT(subpattern);
725         // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
726         store32(reg, Address(output, (subpattern << 1) * sizeof(int)));
727     }
728     void setSubpatternEnd(RegisterID reg, unsigned subpattern)
729     {
730         ASSERT(subpattern);
731         // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
732         store32(reg, Address(output, ((subpattern << 1) + 1) * sizeof(int)));
733     }
734     void clearSubpatternStart(unsigned subpattern)
735     {
736         ASSERT(subpattern);
737         // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
738         store32(TrustedImm32(-1), Address(output, (subpattern << 1) * sizeof(int)));
739     }
740
741     void clearMatches(unsigned subpattern, unsigned lastSubpattern)
742     {
743         for (; subpattern <= lastSubpattern; subpattern++)
744             clearSubpatternStart(subpattern);
745     }
746
747     // We use one of three different strategies to track the start of the current match,
748     // while matching.
749     // 1) If the pattern has a fixed size, do nothing! - we calculate the value lazily
750     //    at the end of matching. This is irrespective of compileMode, and in this case
751     //    these methods should never be called.
752     // 2) If we're compiling IncludeSubpatterns, 'output' contains a pointer to an output
753     //    vector, store the match start in the output vector.
754     // 3) If we're compiling MatchOnly, 'output' is unused, store the match start directly
755     //    in this register.
756     void setMatchStart(RegisterID reg)
757     {
758         ASSERT(!m_pattern.m_body->m_hasFixedSize);
759         if (compileMode == IncludeSubpatterns)
760             store32(reg, output);
761         else
762             move(reg, output);
763     }
764     void getMatchStart(RegisterID reg)
765     {
766         ASSERT(!m_pattern.m_body->m_hasFixedSize);
767         if (compileMode == IncludeSubpatterns)
768             load32(output, reg);
769         else
770             move(output, reg);
771     }
772
773     enum YarrOpCode {
774         // These nodes wrap body alternatives - those in the main disjunction,
775         // rather than subpatterns or assertions. These are chained together in
776         // a doubly linked list, with a 'begin' node for the first alternative,
777         // a 'next' node for each subsequent alternative, and an 'end' node at
778         // the end. In the case of repeating alternatives, the 'end' node also
779         // has a reference back to 'begin'.
780         OpBodyAlternativeBegin,
781         OpBodyAlternativeNext,
782         OpBodyAlternativeEnd,
783         // Similar to the body alternatives, but used for subpatterns with two
784         // or more alternatives.
785         OpNestedAlternativeBegin,
786         OpNestedAlternativeNext,
787         OpNestedAlternativeEnd,
788         // Used for alternatives in subpatterns where there is only a single
789         // alternative (backtracking is easier in these cases), or for alternatives
790         // which never need to be backtracked (those in parenthetical assertions,
791         // terminal subpatterns).
792         OpSimpleNestedAlternativeBegin,
793         OpSimpleNestedAlternativeNext,
794         OpSimpleNestedAlternativeEnd,
795         // Used to wrap 'Once' subpattern matches (quantityMaxCount == 1).
796         OpParenthesesSubpatternOnceBegin,
797         OpParenthesesSubpatternOnceEnd,
798         // Used to wrap 'Terminal' subpattern matches (at the end of the regexp).
799         OpParenthesesSubpatternTerminalBegin,
800         OpParenthesesSubpatternTerminalEnd,
801         // Used to wrap generic captured matches
802         OpParenthesesSubpatternBegin,
803         OpParenthesesSubpatternEnd,
804         // Used to wrap parenthetical assertions.
805         OpParentheticalAssertionBegin,
806         OpParentheticalAssertionEnd,
807         // Wraps all simple terms (pattern characters, character classes).
808         OpTerm,
809         // Where an expression contains only 'once through' body alternatives
810         // and no repeating ones, this op is used to return match failure.
811         OpMatchFailed
812     };
813
814     // This structure is used to hold the compiled opcode information,
815     // including reference back to the original PatternTerm/PatternAlternatives,
816     // and JIT compilation data structures.
817     struct YarrOp {
818         explicit YarrOp(PatternTerm* term)
819             : m_op(OpTerm)
820             , m_term(term)
821             , m_isDeadCode(false)
822         {
823         }
824
825         explicit YarrOp(YarrOpCode op)
826             : m_op(op)
827             , m_isDeadCode(false)
828         {
829         }
830
831         // The operation, as a YarrOpCode, and also a reference to the PatternTerm.
832         YarrOpCode m_op;
833         PatternTerm* m_term;
834
835         // For alternatives, this holds the PatternAlternative and doubly linked
836         // references to this alternative's siblings. In the case of the
837         // OpBodyAlternativeEnd node at the end of a section of repeating nodes,
838         // m_nextOp will reference the OpBodyAlternativeBegin node of the first
839         // repeating alternative.
840         PatternAlternative* m_alternative;
841         size_t m_previousOp;
842         size_t m_nextOp;
843
844         // Used to record a set of Jumps out of the generated code, typically
845         // used for jumps out to backtracking code, and a single reentry back
846         // into the code for a node (likely where a backtrack will trigger
847         // rematching).
848         Label m_reentry;
849         JumpList m_jumps;
850
851         // Used for backtracking when the prior alternative did not consume any
852         // characters but matched.
853         Jump m_zeroLengthMatch;
854
855         // This flag is used to null out the second pattern character, when
856         // two are fused to match a pair together.
857         bool m_isDeadCode;
858
859         // Currently used in the case of some of the more complex management of
860         // 'm_checkedOffset', to cache the offset used in this alternative, to avoid
861         // recalculating it.
862         Checked<unsigned> m_checkAdjust;
863
864         // Used by OpNestedAlternativeNext/End to hold the pointer to the
865         // value that will be pushed into the pattern's frame to return to,
866         // upon backtracking back into the disjunction.
867         DataLabelPtr m_returnAddress;
868     };
869
870     // BacktrackingState
871     // This class encapsulates information about the state of code generation
872     // whilst generating the code for backtracking, when a term fails to match.
873     // Upon entry to code generation of the backtracking code for a given node,
874     // the Backtracking state will hold references to all control flow sources
875     // that are outputs in need of further backtracking from the prior node
876     // generated (which is the subsequent operation in the regular expression,
877     // and in the m_ops Vector, since we generated backtracking backwards).
878     // These references to control flow take the form of:
879     //  - A jump list of jumps, to be linked to code that will backtrack them
880     //    further.
881     //  - A set of DataLabelPtr values, to be populated with values to be
882     //    treated effectively as return addresses backtracking into complex
883     //    subpatterns.
884     //  - A flag indicating that the current sequence of generated code up to
885     //    this point requires backtracking.
886     class BacktrackingState {
887     public:
888         BacktrackingState()
889             : m_pendingFallthrough(false)
890         {
891         }
892
893         // Add a jump or jumps, a return address, or set the flag indicating
894         // that the current 'fallthrough' control flow requires backtracking.
895         void append(const Jump& jump)
896         {
897             m_laterFailures.append(jump);
898         }
899         void append(JumpList& jumpList)
900         {
901             m_laterFailures.append(jumpList);
902         }
903         void append(const DataLabelPtr& returnAddress)
904         {
905             m_pendingReturns.append(returnAddress);
906         }
907         void fallthrough()
908         {
909             ASSERT(!m_pendingFallthrough);
910             m_pendingFallthrough = true;
911         }
912
913         // These methods clear the backtracking state, either linking to the
914         // current location, a provided label, or copying the backtracking out
915         // to a JumpList. All actions may require code generation to take place,
916         // and as such are passed a pointer to the assembler.
917         void link(MacroAssembler* assembler)
918         {
919             if (m_pendingReturns.size()) {
920                 Label here(assembler);
921                 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
922                     m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
923                 m_pendingReturns.clear();
924             }
925             m_laterFailures.link(assembler);
926             m_laterFailures.clear();
927             m_pendingFallthrough = false;
928         }
929         void linkTo(Label label, MacroAssembler* assembler)
930         {
931             if (m_pendingReturns.size()) {
932                 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
933                     m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], label));
934                 m_pendingReturns.clear();
935             }
936             if (m_pendingFallthrough)
937                 assembler->jump(label);
938             m_laterFailures.linkTo(label, assembler);
939             m_laterFailures.clear();
940             m_pendingFallthrough = false;
941         }
942         void takeBacktracksToJumpList(JumpList& jumpList, MacroAssembler* assembler)
943         {
944             if (m_pendingReturns.size()) {
945                 Label here(assembler);
946                 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
947                     m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
948                 m_pendingReturns.clear();
949                 m_pendingFallthrough = true;
950             }
951             if (m_pendingFallthrough)
952                 jumpList.append(assembler->jump());
953             jumpList.append(m_laterFailures);
954             m_laterFailures.clear();
955             m_pendingFallthrough = false;
956         }
957
958         bool isEmpty()
959         {
960             return m_laterFailures.empty() && m_pendingReturns.isEmpty() && !m_pendingFallthrough;
961         }
962
963         // Called at the end of code generation to link all return addresses.
964         void linkDataLabels(LinkBuffer& linkBuffer)
965         {
966             ASSERT(isEmpty());
967             for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
968                 linkBuffer.patch(m_backtrackRecords[i].m_dataLabel, linkBuffer.locationOf<YarrBacktrackPtrTag>(m_backtrackRecords[i].m_backtrackLocation));
969         }
970
971     private:
972         struct ReturnAddressRecord {
973             ReturnAddressRecord(DataLabelPtr dataLabel, Label backtrackLocation)
974                 : m_dataLabel(dataLabel)
975                 , m_backtrackLocation(backtrackLocation)
976             {
977             }
978
979             DataLabelPtr m_dataLabel;
980             Label m_backtrackLocation;
981         };
982
983         JumpList m_laterFailures;
984         bool m_pendingFallthrough;
985         Vector<DataLabelPtr, 4> m_pendingReturns;
986         Vector<ReturnAddressRecord, 4> m_backtrackRecords;
987     };
988
989     // Generation methods:
990     // ===================
991
992     // This method provides a default implementation of backtracking common
993     // to many terms; terms commonly jump out of the forwards  matching path
994     // on any failed conditions, and add these jumps to the m_jumps list. If
995     // no special handling is required we can often just backtrack to m_jumps.
996     void backtrackTermDefault(size_t opIndex)
997     {
998         YarrOp& op = m_ops[opIndex];
999         m_backtrackingState.append(op.m_jumps);
1000     }
1001
1002     void generateAssertionBOL(size_t opIndex)
1003     {
1004         YarrOp& op = m_ops[opIndex];
1005         PatternTerm* term = op.m_term;
1006
1007         if (m_pattern.multiline()) {
1008             const RegisterID character = regT0;
1009
1010             JumpList matchDest;
1011             if (!term->inputPosition)
1012                 matchDest.append(branch32(Equal, index, Imm32(m_checkedOffset.unsafeGet())));
1013
1014             readCharacter(m_checkedOffset - term->inputPosition + 1, character);
1015             matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
1016             op.m_jumps.append(jump());
1017
1018             matchDest.link(this);
1019         } else {
1020             // Erk, really should poison out these alternatives early. :-/
1021             if (term->inputPosition)
1022                 op.m_jumps.append(jump());
1023             else
1024                 op.m_jumps.append(branch32(NotEqual, index, Imm32(m_checkedOffset.unsafeGet())));
1025         }
1026     }
1027     void backtrackAssertionBOL(size_t opIndex)
1028     {
1029         backtrackTermDefault(opIndex);
1030     }
1031
1032     void generateAssertionEOL(size_t opIndex)
1033     {
1034         YarrOp& op = m_ops[opIndex];
1035         PatternTerm* term = op.m_term;
1036
1037         if (m_pattern.multiline()) {
1038             const RegisterID character = regT0;
1039
1040             JumpList matchDest;
1041             if (term->inputPosition == m_checkedOffset.unsafeGet())
1042                 matchDest.append(atEndOfInput());
1043
1044             readCharacter(m_checkedOffset - term->inputPosition, character);
1045             matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
1046             op.m_jumps.append(jump());
1047
1048             matchDest.link(this);
1049         } else {
1050             if (term->inputPosition == m_checkedOffset.unsafeGet())
1051                 op.m_jumps.append(notAtEndOfInput());
1052             // Erk, really should poison out these alternatives early. :-/
1053             else
1054                 op.m_jumps.append(jump());
1055         }
1056     }
1057     void backtrackAssertionEOL(size_t opIndex)
1058     {
1059         backtrackTermDefault(opIndex);
1060     }
1061
1062     // Also falls though on nextIsNotWordChar.
1063     void matchAssertionWordchar(size_t opIndex, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
1064     {
1065         YarrOp& op = m_ops[opIndex];
1066         PatternTerm* term = op.m_term;
1067
1068         const RegisterID character = regT0;
1069
1070         if (term->inputPosition == m_checkedOffset.unsafeGet())
1071             nextIsNotWordChar.append(atEndOfInput());
1072
1073         readCharacter(m_checkedOffset - term->inputPosition, character);
1074
1075         CharacterClass* wordcharCharacterClass;
1076
1077         if (m_unicodeIgnoreCase)
1078             wordcharCharacterClass = m_pattern.wordUnicodeIgnoreCaseCharCharacterClass();
1079         else
1080             wordcharCharacterClass = m_pattern.wordcharCharacterClass();
1081
1082         matchCharacterClass(character, nextIsWordChar, wordcharCharacterClass);
1083     }
1084
1085     void generateAssertionWordBoundary(size_t opIndex)
1086     {
1087         YarrOp& op = m_ops[opIndex];
1088         PatternTerm* term = op.m_term;
1089
1090         const RegisterID character = regT0;
1091
1092         Jump atBegin;
1093         JumpList matchDest;
1094         if (!term->inputPosition)
1095             atBegin = branch32(Equal, index, Imm32(m_checkedOffset.unsafeGet()));
1096         readCharacter(m_checkedOffset - term->inputPosition + 1, character);
1097
1098         CharacterClass* wordcharCharacterClass;
1099
1100         if (m_unicodeIgnoreCase)
1101             wordcharCharacterClass = m_pattern.wordUnicodeIgnoreCaseCharCharacterClass();
1102         else
1103             wordcharCharacterClass = m_pattern.wordcharCharacterClass();
1104
1105         matchCharacterClass(character, matchDest, wordcharCharacterClass);
1106         if (!term->inputPosition)
1107             atBegin.link(this);
1108
1109         // We fall through to here if the last character was not a wordchar.
1110         JumpList nonWordCharThenWordChar;
1111         JumpList nonWordCharThenNonWordChar;
1112         if (term->invert()) {
1113             matchAssertionWordchar(opIndex, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
1114             nonWordCharThenWordChar.append(jump());
1115         } else {
1116             matchAssertionWordchar(opIndex, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
1117             nonWordCharThenNonWordChar.append(jump());
1118         }
1119         op.m_jumps.append(nonWordCharThenNonWordChar);
1120
1121         // We jump here if the last character was a wordchar.
1122         matchDest.link(this);
1123         JumpList wordCharThenWordChar;
1124         JumpList wordCharThenNonWordChar;
1125         if (term->invert()) {
1126             matchAssertionWordchar(opIndex, wordCharThenNonWordChar, wordCharThenWordChar);
1127             wordCharThenWordChar.append(jump());
1128         } else {
1129             matchAssertionWordchar(opIndex, wordCharThenWordChar, wordCharThenNonWordChar);
1130             // This can fall-though!
1131         }
1132
1133         op.m_jumps.append(wordCharThenWordChar);
1134
1135         nonWordCharThenWordChar.link(this);
1136         wordCharThenNonWordChar.link(this);
1137     }
1138     void backtrackAssertionWordBoundary(size_t opIndex)
1139     {
1140         backtrackTermDefault(opIndex);
1141     }
1142
1143 #if ENABLE(YARR_JIT_BACKREFERENCES)
1144     void matchBackreference(size_t opIndex, JumpList& characterMatchFails, RegisterID character, RegisterID patternIndex, RegisterID patternCharacter)
1145     {
1146         YarrOp& op = m_ops[opIndex];
1147         PatternTerm* term = op.m_term;
1148         unsigned subpatternId = term->backReferenceSubpatternId;
1149
1150         Label loop(this);
1151
1152         readCharacterDontDecodeSurrogates(0, patternCharacter, patternIndex);
1153         readCharacterDontDecodeSurrogates(m_checkedOffset - term->inputPosition, character);
1154     
1155         if (!m_pattern.ignoreCase())
1156             characterMatchFails.append(branch32(NotEqual, character, patternCharacter));
1157         else {
1158             Jump charactersMatch = branch32(Equal, character, patternCharacter);
1159             ExtendedAddress characterTableEntry(character, reinterpret_cast<intptr_t>(&canonicalTableLChar));
1160             load16(characterTableEntry, character);
1161             ExtendedAddress patternTableEntry(patternCharacter, reinterpret_cast<intptr_t>(&canonicalTableLChar));
1162             load16(patternTableEntry, patternCharacter);
1163             characterMatchFails.append(branch32(NotEqual, character, patternCharacter));
1164             charactersMatch.link(this);
1165         }
1166
1167         
1168         add32(TrustedImm32(1), index);
1169         add32(TrustedImm32(1), patternIndex);
1170         
1171         branch32(NotEqual, patternIndex, Address(output, ((subpatternId << 1) + 1) * sizeof(int))).linkTo(loop, this);
1172     }
1173
1174     void generateBackReference(size_t opIndex)
1175     {
1176         YarrOp& op = m_ops[opIndex];
1177         PatternTerm* term = op.m_term;
1178
1179         if (m_pattern.ignoreCase() && m_charSize != Char8) {
1180             m_failureReason = JITFailureReason::BackReference;
1181             return;
1182         }
1183
1184         unsigned subpatternId = term->backReferenceSubpatternId;
1185         unsigned parenthesesFrameLocation = term->frameLocation;
1186
1187         const RegisterID characterOrTemp = regT0;
1188         const RegisterID patternIndex = regT1;
1189         const RegisterID patternTemp = regT2;
1190
1191         storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
1192         if (term->quantityType != QuantifierFixedCount || term->quantityMaxCount != 1)
1193             storeToFrame(TrustedImm32(0), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1194
1195         JumpList matches;
1196
1197         if (term->quantityType != QuantifierNonGreedy) {
1198             load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
1199             load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
1200
1201             // An empty match is successful without consuming characters
1202             if (term->quantityType != QuantifierFixedCount || term->quantityMaxCount != 1) {
1203                 matches.append(branch32(Equal, TrustedImm32(-1), patternIndex));
1204                 matches.append(branch32(Equal, patternIndex, patternTemp));
1205             } else {
1206                 Jump zeroLengthMatch = branch32(Equal, TrustedImm32(-1), patternIndex);
1207                 Jump tryNonZeroMatch = branch32(NotEqual, patternIndex, patternTemp);
1208                 zeroLengthMatch.link(this);
1209                 storeToFrame(TrustedImm32(1), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1210                 matches.append(jump());
1211                 tryNonZeroMatch.link(this);
1212             }
1213         }
1214
1215         switch (term->quantityType) {
1216         case QuantifierFixedCount: {
1217             Label outerLoop(this);
1218
1219             // PatternTemp should contain pattern end index at this point
1220             sub32(patternIndex, patternTemp);
1221             op.m_jumps.append(checkNotEnoughInput(patternTemp));
1222
1223             matchBackreference(opIndex, op.m_jumps, characterOrTemp, patternIndex, patternTemp);
1224
1225             if (term->quantityMaxCount != 1) {
1226                 loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), characterOrTemp);
1227                 add32(TrustedImm32(1), characterOrTemp);
1228                 storeToFrame(characterOrTemp, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1229                 matches.append(branch32(Equal, Imm32(term->quantityMaxCount.unsafeGet()), characterOrTemp));
1230                 load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
1231                 load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
1232                 jump(outerLoop);
1233             }
1234             matches.link(this);
1235             break;
1236         }
1237
1238         case QuantifierGreedy: {
1239             JumpList incompleteMatches;
1240
1241             Label outerLoop(this);
1242
1243             // PatternTemp should contain pattern end index at this point
1244             sub32(patternIndex, patternTemp);
1245             matches.append(checkNotEnoughInput(patternTemp));
1246
1247             matchBackreference(opIndex, incompleteMatches, characterOrTemp, patternIndex, patternTemp);
1248
1249             loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), characterOrTemp);
1250             add32(TrustedImm32(1), characterOrTemp);
1251             storeToFrame(characterOrTemp, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1252             if (term->quantityMaxCount != quantifyInfinite)
1253                 matches.append(branch32(Equal, Imm32(term->quantityMaxCount.unsafeGet()), characterOrTemp));
1254             load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
1255             load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
1256
1257             // Store current index in frame for restoring after a partial match
1258             storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
1259             jump(outerLoop);
1260
1261             incompleteMatches.link(this);
1262             loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), index);
1263
1264             matches.link(this);
1265             op.m_reentry = label();
1266             break;
1267         }
1268
1269         case QuantifierNonGreedy: {
1270             JumpList incompleteMatches;
1271
1272             matches.append(jump());
1273
1274             op.m_reentry = label();
1275
1276             load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
1277             load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
1278
1279             // An empty match is successful without consuming characters
1280             Jump zeroLengthMatch = branch32(Equal, TrustedImm32(-1), patternIndex);
1281             Jump tryNonZeroMatch = branch32(NotEqual, patternIndex, patternTemp);
1282             zeroLengthMatch.link(this);
1283             storeToFrame(TrustedImm32(1), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1284             matches.append(jump());
1285             tryNonZeroMatch.link(this);
1286
1287             // Check if we have input remaining to match
1288             sub32(patternIndex, patternTemp);
1289             matches.append(checkNotEnoughInput(patternTemp));
1290
1291             storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
1292
1293             matchBackreference(opIndex, incompleteMatches, characterOrTemp, patternIndex, patternTemp);
1294
1295             matches.append(jump());
1296
1297             incompleteMatches.link(this);
1298             loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), index);
1299
1300             matches.link(this);
1301             break;
1302         }
1303         }
1304     }
1305     void backtrackBackReference(size_t opIndex)
1306     {
1307         YarrOp& op = m_ops[opIndex];
1308         PatternTerm* term = op.m_term;
1309
1310         unsigned subpatternId = term->backReferenceSubpatternId;
1311
1312         m_backtrackingState.link(this);
1313         op.m_jumps.link(this);
1314
1315         JumpList failures;
1316
1317         unsigned parenthesesFrameLocation = term->frameLocation;
1318         switch (term->quantityType) {
1319         case QuantifierFixedCount:
1320             loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), index);
1321             break;
1322
1323         case QuantifierGreedy: {
1324             const RegisterID matchAmount = regT0;
1325             const RegisterID patternStartIndex = regT1;
1326             const RegisterID patternEndIndexOrLen = regT2;
1327
1328             loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), matchAmount);
1329             failures.append(branchTest32(Zero, matchAmount));
1330
1331             load32(Address(output, (subpatternId << 1) * sizeof(int)), patternStartIndex);
1332             load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternEndIndexOrLen);
1333             sub32(patternStartIndex, patternEndIndexOrLen);
1334             sub32(patternEndIndexOrLen, index);
1335
1336             sub32(TrustedImm32(1), matchAmount);
1337             storeToFrame(matchAmount, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1338             jump(op.m_reentry);
1339             break;
1340         }
1341
1342         case QuantifierNonGreedy: {
1343             const RegisterID matchAmount = regT0;
1344
1345             failures.append(atEndOfInput());
1346             loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), matchAmount);
1347             if (term->quantityMaxCount != quantifyInfinite)
1348                 failures.append(branch32(AboveOrEqual, Imm32(term->quantityMaxCount.unsafeGet()), matchAmount));
1349             add32(TrustedImm32(1), matchAmount);
1350             storeToFrame(matchAmount, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1351             jump(op.m_reentry);
1352             break;
1353         }
1354         }
1355         failures.link(this);
1356         m_backtrackingState.fallthrough();
1357     }
1358 #endif
1359
1360     void generatePatternCharacterOnce(size_t opIndex)
1361     {
1362         YarrOp& op = m_ops[opIndex];
1363
1364         if (op.m_isDeadCode)
1365             return;
1366         
1367         // m_ops always ends with a OpBodyAlternativeEnd or OpMatchFailed
1368         // node, so there must always be at least one more node.
1369         ASSERT(opIndex + 1 < m_ops.size());
1370         YarrOp* nextOp = &m_ops[opIndex + 1];
1371
1372         PatternTerm* term = op.m_term;
1373         UChar32 ch = term->patternCharacter;
1374
1375         if ((ch > 0xff) && (m_charSize == Char8)) {
1376             // Have a 16 bit pattern character and an 8 bit string - short circuit
1377             op.m_jumps.append(jump());
1378             return;
1379         }
1380
1381         const RegisterID character = regT0;
1382 #if CPU(X86_64) || CPU(ARM64)
1383         unsigned maxCharactersAtOnce = m_charSize == Char8 ? 8 : 4;
1384 #else
1385         unsigned maxCharactersAtOnce = m_charSize == Char8 ? 4 : 2;
1386 #endif
1387         uint64_t ignoreCaseMask = 0;
1388 #if CPU(BIG_ENDIAN)
1389         uint64_t allCharacters = ch << (m_charSize == Char8 ? 24 : 16);
1390 #else
1391         uint64_t allCharacters = ch;
1392 #endif
1393         unsigned numberCharacters;
1394         unsigned startTermPosition = term->inputPosition;
1395
1396         // For case-insesitive compares, non-ascii characters that have different
1397         // upper & lower case representations are converted to a character class.
1398         ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(ch) || isCanonicallyUnique(ch, m_canonicalMode));
1399
1400         if (m_pattern.ignoreCase() && isASCIIAlpha(ch)) {
1401 #if CPU(BIG_ENDIAN)
1402             ignoreCaseMask |= 32 << (m_charSize == Char8 ? 24 : 16);
1403 #else
1404             ignoreCaseMask |= 32;
1405 #endif
1406         }
1407
1408         for (numberCharacters = 1; numberCharacters < maxCharactersAtOnce && nextOp->m_op == OpTerm; ++numberCharacters, nextOp = &m_ops[opIndex + numberCharacters]) {
1409             PatternTerm* nextTerm = nextOp->m_term;
1410
1411             // YarrJIT handles decoded surrogate pair as one character if unicode flag is enabled.
1412             // Note that the numberCharacters become 1 while the width of the pattern character becomes 32bit in this case.
1413             if (nextTerm->type != PatternTerm::TypePatternCharacter
1414                 || nextTerm->quantityType != QuantifierFixedCount
1415                 || nextTerm->quantityMaxCount != 1
1416                 || nextTerm->inputPosition != (startTermPosition + numberCharacters)
1417                 || (U16_LENGTH(nextTerm->patternCharacter) != 1 && m_decodeSurrogatePairs))
1418                 break;
1419
1420             nextOp->m_isDeadCode = true;
1421
1422 #if CPU(BIG_ENDIAN)
1423             int shiftAmount = (m_charSize == Char8 ? 24 : 16) - ((m_charSize == Char8 ? 8 : 16) * numberCharacters);
1424 #else
1425             int shiftAmount = (m_charSize == Char8 ? 8 : 16) * numberCharacters;
1426 #endif
1427
1428             UChar32 currentCharacter = nextTerm->patternCharacter;
1429
1430             if ((currentCharacter > 0xff) && (m_charSize == Char8)) {
1431                 // Have a 16 bit pattern character and an 8 bit string - short circuit
1432                 op.m_jumps.append(jump());
1433                 return;
1434             }
1435
1436             // For case-insesitive compares, non-ascii characters that have different
1437             // upper & lower case representations are converted to a character class.
1438             ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(currentCharacter) || isCanonicallyUnique(currentCharacter, m_canonicalMode));
1439
1440             allCharacters |= (static_cast<uint64_t>(currentCharacter) << shiftAmount);
1441
1442             if ((m_pattern.ignoreCase()) && (isASCIIAlpha(currentCharacter)))
1443                 ignoreCaseMask |= 32ULL << shiftAmount;
1444         }
1445
1446         if (m_decodeSurrogatePairs)
1447             op.m_jumps.append(jumpIfNoAvailableInput());
1448
1449         if (m_charSize == Char8) {
1450             auto check1 = [&] (Checked<unsigned> offset, UChar32 characters) {
1451                 op.m_jumps.append(jumpIfCharNotEquals(characters, offset, character));
1452             };
1453
1454             auto check2 = [&] (Checked<unsigned> offset, uint16_t characters, uint16_t mask) {
1455                 load16Unaligned(negativeOffsetIndexedAddress(offset, character), character);
1456                 if (mask)
1457                     or32(Imm32(mask), character);
1458                 op.m_jumps.append(branch32(NotEqual, character, Imm32(characters | mask)));
1459             };
1460
1461             auto check4 = [&] (Checked<unsigned> offset, unsigned characters, unsigned mask) {
1462                 if (mask) {
1463                     load32WithUnalignedHalfWords(negativeOffsetIndexedAddress(offset, character), character);
1464                     if (mask)
1465                         or32(Imm32(mask), character);
1466                     op.m_jumps.append(branch32(NotEqual, character, Imm32(characters | mask)));
1467                     return;
1468                 }
1469                 op.m_jumps.append(branch32WithUnalignedHalfWords(NotEqual, negativeOffsetIndexedAddress(offset, character), TrustedImm32(characters)));
1470             };
1471
1472 #if CPU(X86_64) || CPU(ARM64)
1473             auto check8 = [&] (Checked<unsigned> offset, uint64_t characters, uint64_t mask) {
1474                 load64(negativeOffsetIndexedAddress(offset, character), character);
1475                 if (mask)
1476                     or64(TrustedImm64(mask), character);
1477                 op.m_jumps.append(branch64(NotEqual, character, TrustedImm64(characters | mask)));
1478             };
1479 #endif
1480
1481             switch (numberCharacters) {
1482             case 1:
1483                 // Use 32bit width of allCharacters since Yarr counts surrogate pairs as one character with unicode flag.
1484                 check1(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff);
1485                 return;
1486             case 2: {
1487                 check2(m_checkedOffset - startTermPosition, allCharacters & 0xffff, ignoreCaseMask & 0xffff);
1488                 return;
1489             }
1490             case 3: {
1491                 check2(m_checkedOffset - startTermPosition, allCharacters & 0xffff, ignoreCaseMask & 0xffff);
1492                 check1(m_checkedOffset - startTermPosition - 2, (allCharacters >> 16) & 0xff);
1493                 return;
1494             }
1495             case 4: {
1496                 check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1497                 return;
1498             }
1499 #if CPU(X86_64) || CPU(ARM64)
1500             case 5: {
1501                 check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1502                 check1(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xff);
1503                 return;
1504             }
1505             case 6: {
1506                 check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1507                 check2(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xffff, (ignoreCaseMask >> 32) & 0xffff);
1508                 return;
1509             }
1510             case 7: {
1511                 check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1512                 check2(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xffff, (ignoreCaseMask >> 32) & 0xffff);
1513                 check1(m_checkedOffset - startTermPosition - 6, (allCharacters >> 48) & 0xff);
1514                 return;
1515             }
1516             case 8: {
1517                 check8(m_checkedOffset - startTermPosition, allCharacters, ignoreCaseMask);
1518                 return;
1519             }
1520 #endif
1521             }
1522         } else {
1523             auto check1 = [&] (Checked<unsigned> offset, UChar32 characters) {
1524                 op.m_jumps.append(jumpIfCharNotEquals(characters, offset, character));
1525             };
1526
1527             auto check2 = [&] (Checked<unsigned> offset, unsigned characters, unsigned mask) {
1528                 if (mask) {
1529                     load32WithUnalignedHalfWords(negativeOffsetIndexedAddress(offset, character), character);
1530                     if (mask)
1531                         or32(Imm32(mask), character);
1532                     op.m_jumps.append(branch32(NotEqual, character, Imm32(characters | mask)));
1533                     return;
1534                 }
1535                 op.m_jumps.append(branch32WithUnalignedHalfWords(NotEqual, negativeOffsetIndexedAddress(offset, character), TrustedImm32(characters)));
1536             };
1537
1538 #if CPU(X86_64) || CPU(ARM64)
1539             auto check4 = [&] (Checked<unsigned> offset, uint64_t characters, uint64_t mask) {
1540                 load64(negativeOffsetIndexedAddress(offset, character), character);
1541                 if (mask)
1542                     or64(TrustedImm64(mask), character);
1543                 op.m_jumps.append(branch64(NotEqual, character, TrustedImm64(characters | mask)));
1544             };
1545 #endif
1546
1547             switch (numberCharacters) {
1548             case 1:
1549                 // Use 32bit width of allCharacters since Yarr counts surrogate pairs as one character with unicode flag.
1550                 check1(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff);
1551                 return;
1552             case 2:
1553                 check2(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1554                 return;
1555 #if CPU(X86_64) || CPU(ARM64)
1556             case 3:
1557                 check2(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1558                 check1(m_checkedOffset - startTermPosition - 2, (allCharacters >> 32) & 0xffff);
1559                 return;
1560             case 4:
1561                 check4(m_checkedOffset - startTermPosition, allCharacters, ignoreCaseMask);
1562                 return;
1563 #endif
1564             }
1565         }
1566     }
1567     void backtrackPatternCharacterOnce(size_t opIndex)
1568     {
1569         backtrackTermDefault(opIndex);
1570     }
1571
1572     void generatePatternCharacterFixed(size_t opIndex)
1573     {
1574         YarrOp& op = m_ops[opIndex];
1575         PatternTerm* term = op.m_term;
1576         UChar32 ch = term->patternCharacter;
1577
1578         const RegisterID character = regT0;
1579         const RegisterID countRegister = regT1;
1580
1581         if (m_decodeSurrogatePairs)
1582             op.m_jumps.append(jumpIfNoAvailableInput());
1583
1584         move(index, countRegister);
1585         Checked<unsigned> scaledMaxCount = term->quantityMaxCount;
1586         scaledMaxCount *= U_IS_BMP(ch) ? 1 : 2;
1587         sub32(Imm32(scaledMaxCount.unsafeGet()), countRegister);
1588
1589         Label loop(this);
1590         readCharacter(m_checkedOffset - term->inputPosition - scaledMaxCount, character, countRegister);
1591         // For case-insesitive compares, non-ascii characters that have different
1592         // upper & lower case representations are converted to a character class.
1593         ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(ch) || isCanonicallyUnique(ch, m_canonicalMode));
1594         if (m_pattern.ignoreCase() && isASCIIAlpha(ch)) {
1595             or32(TrustedImm32(0x20), character);
1596             ch |= 0x20;
1597         }
1598
1599         op.m_jumps.append(branch32(NotEqual, character, Imm32(ch)));
1600 #ifdef JIT_UNICODE_EXPRESSIONS
1601         if (m_decodeSurrogatePairs && !U_IS_BMP(ch))
1602             add32(TrustedImm32(2), countRegister);
1603         else
1604 #endif
1605             add32(TrustedImm32(1), countRegister);
1606         branch32(NotEqual, countRegister, index).linkTo(loop, this);
1607     }
1608     void backtrackPatternCharacterFixed(size_t opIndex)
1609     {
1610         backtrackTermDefault(opIndex);
1611     }
1612
1613     void generatePatternCharacterGreedy(size_t opIndex)
1614     {
1615         YarrOp& op = m_ops[opIndex];
1616         PatternTerm* term = op.m_term;
1617         UChar32 ch = term->patternCharacter;
1618
1619         const RegisterID character = regT0;
1620         const RegisterID countRegister = regT1;
1621
1622         move(TrustedImm32(0), countRegister);
1623
1624         // Unless have a 16 bit pattern character and an 8 bit string - short circuit
1625         if (!((ch > 0xff) && (m_charSize == Char8))) {
1626             JumpList failures;
1627             Label loop(this);
1628             failures.append(atEndOfInput());
1629             failures.append(jumpIfCharNotEquals(ch, m_checkedOffset - term->inputPosition, character));
1630
1631             add32(TrustedImm32(1), index);
1632 #ifdef JIT_UNICODE_EXPRESSIONS
1633             if (m_decodeSurrogatePairs && !U_IS_BMP(ch)) {
1634                 Jump surrogatePairOk = notAtEndOfInput();
1635                 sub32(TrustedImm32(1), index);
1636                 failures.append(jump());
1637                 surrogatePairOk.link(this);
1638                 add32(TrustedImm32(1), index);
1639             }
1640 #endif
1641             add32(TrustedImm32(1), countRegister);
1642
1643             if (term->quantityMaxCount == quantifyInfinite)
1644                 jump(loop);
1645             else
1646                 branch32(NotEqual, countRegister, Imm32(term->quantityMaxCount.unsafeGet())).linkTo(loop, this);
1647
1648             failures.link(this);
1649         }
1650         op.m_reentry = label();
1651
1652         storeToFrame(countRegister, term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex());
1653     }
1654     void backtrackPatternCharacterGreedy(size_t opIndex)
1655     {
1656         YarrOp& op = m_ops[opIndex];
1657         PatternTerm* term = op.m_term;
1658
1659         const RegisterID countRegister = regT1;
1660
1661         m_backtrackingState.link(this);
1662
1663         loadFromFrame(term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex(), countRegister);
1664         m_backtrackingState.append(branchTest32(Zero, countRegister));
1665         sub32(TrustedImm32(1), countRegister);
1666         if (!m_decodeSurrogatePairs || U_IS_BMP(term->patternCharacter))
1667             sub32(TrustedImm32(1), index);
1668         else
1669             sub32(TrustedImm32(2), index);
1670         jump(op.m_reentry);
1671     }
1672
1673     void generatePatternCharacterNonGreedy(size_t opIndex)
1674     {
1675         YarrOp& op = m_ops[opIndex];
1676         PatternTerm* term = op.m_term;
1677
1678         const RegisterID countRegister = regT1;
1679
1680         move(TrustedImm32(0), countRegister);
1681         op.m_reentry = label();
1682         storeToFrame(countRegister, term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex());
1683     }
1684     void backtrackPatternCharacterNonGreedy(size_t opIndex)
1685     {
1686         YarrOp& op = m_ops[opIndex];
1687         PatternTerm* term = op.m_term;
1688         UChar32 ch = term->patternCharacter;
1689
1690         const RegisterID character = regT0;
1691         const RegisterID countRegister = regT1;
1692
1693         m_backtrackingState.link(this);
1694
1695         loadFromFrame(term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex(), countRegister);
1696
1697         // Unless have a 16 bit pattern character and an 8 bit string - short circuit
1698         if (!((ch > 0xff) && (m_charSize == Char8))) {
1699             JumpList nonGreedyFailures;
1700             nonGreedyFailures.append(atEndOfInput());
1701             if (term->quantityMaxCount != quantifyInfinite)
1702                 nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityMaxCount.unsafeGet())));
1703             nonGreedyFailures.append(jumpIfCharNotEquals(ch, m_checkedOffset - term->inputPosition, character));
1704
1705             add32(TrustedImm32(1), index);
1706 #ifdef JIT_UNICODE_EXPRESSIONS
1707             if (m_decodeSurrogatePairs && !U_IS_BMP(ch)) {
1708                 Jump surrogatePairOk = notAtEndOfInput();
1709                 sub32(TrustedImm32(1), index);
1710                 nonGreedyFailures.append(jump());
1711                 surrogatePairOk.link(this);
1712                 add32(TrustedImm32(1), index);
1713             }
1714 #endif
1715             add32(TrustedImm32(1), countRegister);
1716
1717             jump(op.m_reentry);
1718             nonGreedyFailures.link(this);
1719         }
1720
1721         if (m_decodeSurrogatePairs && !U_IS_BMP(ch)) {
1722             // subtract countRegister*2 for non-BMP characters
1723             lshift32(TrustedImm32(1), countRegister);
1724         }
1725
1726         sub32(countRegister, index);
1727         m_backtrackingState.fallthrough();
1728     }
1729
1730     void generateCharacterClassOnce(size_t opIndex)
1731     {
1732         YarrOp& op = m_ops[opIndex];
1733         PatternTerm* term = op.m_term;
1734
1735         const RegisterID character = regT0;
1736
1737         if (m_decodeSurrogatePairs) {
1738             op.m_jumps.append(jumpIfNoAvailableInput());
1739             storeToFrame(index, term->frameLocation + BackTrackInfoCharacterClass::beginIndex());
1740         }
1741
1742         JumpList matchDest;
1743         readCharacter(m_checkedOffset - term->inputPosition, character);
1744         // If we are matching the "any character" builtin class we only need to read the
1745         // character and don't need to match as it will always succeed.
1746         if (term->invert() || !term->characterClass->m_anyCharacter) {
1747             matchCharacterClass(character, matchDest, term->characterClass);
1748
1749             if (term->invert())
1750                 op.m_jumps.append(matchDest);
1751             else {
1752                 op.m_jumps.append(jump());
1753                 matchDest.link(this);
1754             }
1755         }
1756 #ifdef JIT_UNICODE_EXPRESSIONS
1757         if (m_decodeSurrogatePairs && (!term->characterClass->hasOneCharacterSize() || term->invert())) {
1758             Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
1759             add32(TrustedImm32(1), index);
1760             isBMPChar.link(this);
1761         }
1762 #endif
1763     }
1764     void backtrackCharacterClassOnce(size_t opIndex)
1765     {
1766 #ifdef JIT_UNICODE_EXPRESSIONS
1767         if (m_decodeSurrogatePairs) {
1768             YarrOp& op = m_ops[opIndex];
1769             PatternTerm* term = op.m_term;
1770
1771             m_backtrackingState.link(this);
1772             loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::beginIndex(), index);
1773             m_backtrackingState.fallthrough();
1774         }
1775 #endif
1776         backtrackTermDefault(opIndex);
1777     }
1778
1779     void generateCharacterClassFixed(size_t opIndex)
1780     {
1781         YarrOp& op = m_ops[opIndex];
1782         PatternTerm* term = op.m_term;
1783
1784         const RegisterID character = regT0;
1785         const RegisterID countRegister = regT1;
1786
1787         if (m_decodeSurrogatePairs)
1788             op.m_jumps.append(jumpIfNoAvailableInput());
1789
1790         move(index, countRegister);
1791
1792         Checked<unsigned> scaledMaxCount = term->quantityMaxCount;
1793
1794 #ifdef JIT_UNICODE_EXPRESSIONS
1795         if (m_decodeSurrogatePairs && term->characterClass->hasOnlyNonBMPCharacters() && !term->invert())
1796             scaledMaxCount *= 2;
1797 #endif
1798         sub32(Imm32(scaledMaxCount.unsafeGet()), countRegister);
1799
1800         Label loop(this);
1801         JumpList matchDest;
1802         readCharacter(m_checkedOffset - term->inputPosition - scaledMaxCount, character, countRegister);
1803         // If we are matching the "any character" builtin class we only need to read the
1804         // character and don't need to match as it will always succeed.
1805         if (term->invert() || !term->characterClass->m_anyCharacter) {
1806             matchCharacterClass(character, matchDest, term->characterClass);
1807
1808             if (term->invert())
1809                 op.m_jumps.append(matchDest);
1810             else {
1811                 op.m_jumps.append(jump());
1812                 matchDest.link(this);
1813             }
1814         }
1815
1816 #ifdef JIT_UNICODE_EXPRESSIONS
1817         if (m_decodeSurrogatePairs) {
1818             if (term->characterClass->hasOneCharacterSize() && !term->invert())
1819                 add32(TrustedImm32(term->characterClass->hasNonBMPCharacters() ? 2 : 1), countRegister);
1820             else {
1821                 add32(TrustedImm32(1), countRegister);
1822                 Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
1823                 op.m_jumps.append(atEndOfInput());
1824                 add32(TrustedImm32(1), countRegister);
1825                 add32(TrustedImm32(1), index);
1826                 isBMPChar.link(this);
1827             }
1828         } else
1829 #endif
1830             add32(TrustedImm32(1), countRegister);
1831         branch32(NotEqual, countRegister, index).linkTo(loop, this);
1832     }
1833     void backtrackCharacterClassFixed(size_t opIndex)
1834     {
1835         backtrackTermDefault(opIndex);
1836     }
1837
1838     void generateCharacterClassGreedy(size_t opIndex)
1839     {
1840         YarrOp& op = m_ops[opIndex];
1841         PatternTerm* term = op.m_term;
1842
1843         const RegisterID character = regT0;
1844         const RegisterID countRegister = regT1;
1845
1846         if (m_decodeSurrogatePairs && (!term->characterClass->hasOneCharacterSize() || term->invert()))
1847             storeToFrame(index, term->frameLocation + BackTrackInfoCharacterClass::beginIndex());
1848         move(TrustedImm32(0), countRegister);
1849
1850         JumpList failures;
1851         Label loop(this);
1852 #ifdef JIT_UNICODE_EXPRESSIONS
1853         if (term->characterClass->hasOneCharacterSize() && !term->invert() && term->characterClass->hasNonBMPCharacters()) {
1854             move(TrustedImm32(1), character);
1855             failures.append(checkNotEnoughInput(character));
1856         } else
1857 #endif
1858             failures.append(atEndOfInput());
1859
1860         if (term->invert()) {
1861             readCharacter(m_checkedOffset - term->inputPosition, character);
1862             matchCharacterClass(character, failures, term->characterClass);
1863         } else {
1864             JumpList matchDest;
1865             readCharacter(m_checkedOffset - term->inputPosition, character);
1866             // If we are matching the "any character" builtin class for non-unicode patterns,
1867             // we only need to read the character and don't need to match as it will always succeed.
1868             if (!term->characterClass->m_anyCharacter) {
1869                 matchCharacterClass(character, matchDest, term->characterClass);
1870                 failures.append(jump());
1871             }
1872             matchDest.link(this);
1873         }
1874
1875 #ifdef JIT_UNICODE_EXPRESSIONS
1876         if (m_decodeSurrogatePairs)
1877             advanceIndexAfterCharacterClassTermMatch(term, failures, character);
1878         else
1879 #endif
1880             add32(TrustedImm32(1), index);
1881         add32(TrustedImm32(1), countRegister);
1882
1883         if (term->quantityMaxCount != quantifyInfinite) {
1884             branch32(NotEqual, countRegister, Imm32(term->quantityMaxCount.unsafeGet())).linkTo(loop, this);
1885             failures.append(jump());
1886         } else
1887             jump(loop);
1888
1889         failures.link(this);
1890         op.m_reentry = label();
1891
1892         storeToFrame(countRegister, term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex());
1893     }
1894     void backtrackCharacterClassGreedy(size_t opIndex)
1895     {
1896         YarrOp& op = m_ops[opIndex];
1897         PatternTerm* term = op.m_term;
1898
1899         const RegisterID countRegister = regT1;
1900
1901         m_backtrackingState.link(this);
1902
1903         loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex(), countRegister);
1904         m_backtrackingState.append(branchTest32(Zero, countRegister));
1905         sub32(TrustedImm32(1), countRegister);
1906         storeToFrame(countRegister, term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex());
1907
1908         if (!m_decodeSurrogatePairs)
1909             sub32(TrustedImm32(1), index);
1910         else if (term->characterClass->hasOneCharacterSize() && !term->invert())
1911             sub32(TrustedImm32(term->characterClass->hasNonBMPCharacters() ? 2 : 1), index);
1912         else {
1913             // Rematch one less
1914             const RegisterID character = regT0;
1915
1916             loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::beginIndex(), index);
1917
1918             Label rematchLoop(this);
1919             readCharacter(m_checkedOffset - term->inputPosition, character);
1920
1921             sub32(TrustedImm32(1), countRegister);
1922             add32(TrustedImm32(1), index);
1923
1924 #ifdef JIT_UNICODE_EXPRESSIONS
1925             Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
1926             add32(TrustedImm32(1), index);
1927             isBMPChar.link(this);
1928 #endif
1929
1930             branchTest32(Zero, countRegister).linkTo(rematchLoop, this);
1931
1932             loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex(), countRegister);
1933         }
1934         jump(op.m_reentry);
1935     }
1936
1937     void generateCharacterClassNonGreedy(size_t opIndex)
1938     {
1939         YarrOp& op = m_ops[opIndex];
1940         PatternTerm* term = op.m_term;
1941
1942         const RegisterID countRegister = regT1;
1943
1944         move(TrustedImm32(0), countRegister);
1945         op.m_reentry = label();
1946
1947 #ifdef JIT_UNICODE_EXPRESSIONS
1948         if (m_decodeSurrogatePairs) {
1949             if (!term->characterClass->hasOneCharacterSize() || term->invert())
1950                 storeToFrame(index, term->frameLocation + BackTrackInfoCharacterClass::beginIndex());
1951         }
1952 #endif
1953
1954         storeToFrame(countRegister, term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex());
1955     }
1956
1957     void backtrackCharacterClassNonGreedy(size_t opIndex)
1958     {
1959         YarrOp& op = m_ops[opIndex];
1960         PatternTerm* term = op.m_term;
1961
1962         const RegisterID character = regT0;
1963         const RegisterID countRegister = regT1;
1964
1965         JumpList nonGreedyFailures;
1966
1967         m_backtrackingState.link(this);
1968
1969 #ifdef JIT_UNICODE_EXPRESSIONS
1970         if (m_decodeSurrogatePairs) {
1971             if (!term->characterClass->hasOneCharacterSize() || term->invert())
1972                 loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::beginIndex(), index);
1973         }
1974 #endif
1975
1976         loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex(), countRegister);
1977
1978         nonGreedyFailures.append(atEndOfInput());
1979         nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityMaxCount.unsafeGet())));
1980
1981         JumpList matchDest;
1982         readCharacter(m_checkedOffset - term->inputPosition, character);
1983         // If we are matching the "any character" builtin class for non-unicode patterns,
1984         // we only need to read the character and don't need to match as it will always succeed.
1985         if (term->invert() || !term->characterClass->m_anyCharacter) {
1986             matchCharacterClass(character, matchDest, term->characterClass);
1987
1988             if (term->invert())
1989                 nonGreedyFailures.append(matchDest);
1990             else {
1991                 nonGreedyFailures.append(jump());
1992                 matchDest.link(this);
1993             }
1994         }
1995
1996 #ifdef JIT_UNICODE_EXPRESSIONS
1997         if (m_decodeSurrogatePairs)
1998             advanceIndexAfterCharacterClassTermMatch(term, nonGreedyFailures, character);
1999         else
2000 #endif
2001             add32(TrustedImm32(1), index);
2002         add32(TrustedImm32(1), countRegister);
2003
2004         jump(op.m_reentry);
2005
2006         nonGreedyFailures.link(this);
2007         sub32(countRegister, index);
2008         m_backtrackingState.fallthrough();
2009     }
2010
2011     void generateDotStarEnclosure(size_t opIndex)
2012     {
2013         YarrOp& op = m_ops[opIndex];
2014         PatternTerm* term = op.m_term;
2015
2016         const RegisterID character = regT0;
2017         const RegisterID matchPos = regT1;
2018 #ifndef HAVE_INITIAL_START_REG
2019         const RegisterID initialStart = character;
2020 #endif
2021
2022         JumpList foundBeginningNewLine;
2023         JumpList saveStartIndex;
2024         JumpList foundEndingNewLine;
2025
2026         if (m_pattern.dotAll()) {
2027             move(TrustedImm32(0), matchPos);
2028             setMatchStart(matchPos);
2029             move(length, index);
2030             return;
2031         }
2032
2033         ASSERT(!m_pattern.m_body->m_hasFixedSize);
2034         getMatchStart(matchPos);
2035
2036 #ifndef HAVE_INITIAL_START_REG
2037         loadFromFrame(m_pattern.m_initialStartValueFrameLocation, initialStart);
2038 #endif
2039         saveStartIndex.append(branch32(BelowOrEqual, matchPos, initialStart));
2040         Label findBOLLoop(this);
2041         sub32(TrustedImm32(1), matchPos);
2042         if (m_charSize == Char8)
2043             load8(BaseIndex(input, matchPos, TimesOne, 0), character);
2044         else
2045             load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
2046         matchCharacterClass(character, foundBeginningNewLine, m_pattern.newlineCharacterClass());
2047
2048 #ifndef HAVE_INITIAL_START_REG
2049         loadFromFrame(m_pattern.m_initialStartValueFrameLocation, initialStart);
2050 #endif
2051         branch32(Above, matchPos, initialStart).linkTo(findBOLLoop, this);
2052         saveStartIndex.append(jump());
2053
2054         foundBeginningNewLine.link(this);
2055         add32(TrustedImm32(1), matchPos); // Advance past newline
2056         saveStartIndex.link(this);
2057
2058         if (!m_pattern.multiline() && term->anchors.bolAnchor)
2059             op.m_jumps.append(branchTest32(NonZero, matchPos));
2060
2061         ASSERT(!m_pattern.m_body->m_hasFixedSize);
2062         setMatchStart(matchPos);
2063
2064         move(index, matchPos);
2065
2066         Label findEOLLoop(this);        
2067         foundEndingNewLine.append(branch32(Equal, matchPos, length));
2068         if (m_charSize == Char8)
2069             load8(BaseIndex(input, matchPos, TimesOne, 0), character);
2070         else
2071             load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
2072         matchCharacterClass(character, foundEndingNewLine, m_pattern.newlineCharacterClass());
2073         add32(TrustedImm32(1), matchPos);
2074         jump(findEOLLoop);
2075
2076         foundEndingNewLine.link(this);
2077
2078         if (!m_pattern.multiline() && term->anchors.eolAnchor)
2079             op.m_jumps.append(branch32(NotEqual, matchPos, length));
2080
2081         move(matchPos, index);
2082     }
2083
2084     void backtrackDotStarEnclosure(size_t opIndex)
2085     {
2086         backtrackTermDefault(opIndex);
2087     }
2088     
2089     // Code generation/backtracking for simple terms
2090     // (pattern characters, character classes, and assertions).
2091     // These methods farm out work to the set of functions above.
2092     void generateTerm(size_t opIndex)
2093     {
2094         YarrOp& op = m_ops[opIndex];
2095         PatternTerm* term = op.m_term;
2096
2097         switch (term->type) {
2098         case PatternTerm::TypePatternCharacter:
2099             switch (term->quantityType) {
2100             case QuantifierFixedCount:
2101                 if (term->quantityMaxCount == 1)
2102                     generatePatternCharacterOnce(opIndex);
2103                 else
2104                     generatePatternCharacterFixed(opIndex);
2105                 break;
2106             case QuantifierGreedy:
2107                 generatePatternCharacterGreedy(opIndex);
2108                 break;
2109             case QuantifierNonGreedy:
2110                 generatePatternCharacterNonGreedy(opIndex);
2111                 break;
2112             }
2113             break;
2114
2115         case PatternTerm::TypeCharacterClass:
2116             switch (term->quantityType) {
2117             case QuantifierFixedCount:
2118                 if (term->quantityMaxCount == 1)
2119                     generateCharacterClassOnce(opIndex);
2120                 else
2121                     generateCharacterClassFixed(opIndex);
2122                 break;
2123             case QuantifierGreedy:
2124                 generateCharacterClassGreedy(opIndex);
2125                 break;
2126             case QuantifierNonGreedy:
2127                 generateCharacterClassNonGreedy(opIndex);
2128                 break;
2129             }
2130             break;
2131
2132         case PatternTerm::TypeAssertionBOL:
2133             generateAssertionBOL(opIndex);
2134             break;
2135
2136         case PatternTerm::TypeAssertionEOL:
2137             generateAssertionEOL(opIndex);
2138             break;
2139
2140         case PatternTerm::TypeAssertionWordBoundary:
2141             generateAssertionWordBoundary(opIndex);
2142             break;
2143
2144         case PatternTerm::TypeForwardReference:
2145             m_failureReason = JITFailureReason::ForwardReference;
2146             break;
2147
2148         case PatternTerm::TypeParenthesesSubpattern:
2149         case PatternTerm::TypeParentheticalAssertion:
2150             RELEASE_ASSERT_NOT_REACHED();
2151
2152         case PatternTerm::TypeBackReference:
2153 #if ENABLE(YARR_JIT_BACKREFERENCES)
2154             generateBackReference(opIndex);
2155 #else
2156             m_failureReason = JITFailureReason::BackReference;
2157 #endif
2158             break;
2159         case PatternTerm::TypeDotStarEnclosure:
2160             generateDotStarEnclosure(opIndex);
2161             break;
2162         }
2163     }
2164     void backtrackTerm(size_t opIndex)
2165     {
2166         YarrOp& op = m_ops[opIndex];
2167         PatternTerm* term = op.m_term;
2168
2169         switch (term->type) {
2170         case PatternTerm::TypePatternCharacter:
2171             switch (term->quantityType) {
2172             case QuantifierFixedCount:
2173                 if (term->quantityMaxCount == 1)
2174                     backtrackPatternCharacterOnce(opIndex);
2175                 else
2176                     backtrackPatternCharacterFixed(opIndex);
2177                 break;
2178             case QuantifierGreedy:
2179                 backtrackPatternCharacterGreedy(opIndex);
2180                 break;
2181             case QuantifierNonGreedy:
2182                 backtrackPatternCharacterNonGreedy(opIndex);
2183                 break;
2184             }
2185             break;
2186
2187         case PatternTerm::TypeCharacterClass:
2188             switch (term->quantityType) {
2189             case QuantifierFixedCount:
2190                 if (term->quantityMaxCount == 1)
2191                     backtrackCharacterClassOnce(opIndex);
2192                 else
2193                     backtrackCharacterClassFixed(opIndex);
2194                 break;
2195             case QuantifierGreedy:
2196                 backtrackCharacterClassGreedy(opIndex);
2197                 break;
2198             case QuantifierNonGreedy:
2199                 backtrackCharacterClassNonGreedy(opIndex);
2200                 break;
2201             }
2202             break;
2203
2204         case PatternTerm::TypeAssertionBOL:
2205             backtrackAssertionBOL(opIndex);
2206             break;
2207
2208         case PatternTerm::TypeAssertionEOL:
2209             backtrackAssertionEOL(opIndex);
2210             break;
2211
2212         case PatternTerm::TypeAssertionWordBoundary:
2213             backtrackAssertionWordBoundary(opIndex);
2214             break;
2215
2216         case PatternTerm::TypeForwardReference:
2217             m_failureReason = JITFailureReason::ForwardReference;
2218             break;
2219
2220         case PatternTerm::TypeParenthesesSubpattern:
2221         case PatternTerm::TypeParentheticalAssertion:
2222             RELEASE_ASSERT_NOT_REACHED();
2223
2224         case PatternTerm::TypeBackReference:
2225 #if ENABLE(YARR_JIT_BACKREFERENCES)
2226             backtrackBackReference(opIndex);
2227 #else
2228             m_failureReason = JITFailureReason::BackReference;
2229 #endif
2230             break;
2231
2232         case PatternTerm::TypeDotStarEnclosure:
2233             backtrackDotStarEnclosure(opIndex);
2234             break;
2235         }
2236     }
2237
2238     void generate()
2239     {
2240         // Forwards generate the matching code.
2241         ASSERT(m_ops.size());
2242         size_t opIndex = 0;
2243
2244         do {
2245             if (m_disassembler)
2246                 m_disassembler->setForGenerate(opIndex, label());
2247
2248             YarrOp& op = m_ops[opIndex];
2249             switch (op.m_op) {
2250
2251             case OpTerm:
2252                 generateTerm(opIndex);
2253                 break;
2254
2255             // OpBodyAlternativeBegin/Next/End
2256             //
2257             // These nodes wrap the set of alternatives in the body of the regular expression.
2258             // There may be either one or two chains of OpBodyAlternative nodes, one representing
2259             // the 'once through' sequence of alternatives (if any exist), and one representing
2260             // the repeating alternatives (again, if any exist).
2261             //
2262             // Upon normal entry to the Begin alternative, we will check that input is available.
2263             // Reentry to the Begin alternative will take place after the check has taken place,
2264             // and will assume that the input position has already been progressed as appropriate.
2265             //
2266             // Entry to subsequent Next/End alternatives occurs when the prior alternative has
2267             // successfully completed a match - return a success state from JIT code.
2268             //
2269             // Next alternatives allow for reentry optimized to suit backtracking from its
2270             // preceding alternative. It expects the input position to still be set to a position
2271             // appropriate to its predecessor, and it will only perform an input check if the
2272             // predecessor had a minimum size less than its own.
2273             //
2274             // In the case 'once through' expressions, the End node will also have a reentry
2275             // point to jump to when the last alternative fails. Again, this expects the input
2276             // position to still reflect that expected by the prior alternative.
2277             case OpBodyAlternativeBegin: {
2278                 PatternAlternative* alternative = op.m_alternative;
2279
2280                 // Upon entry at the head of the set of alternatives, check if input is available
2281                 // to run the first alternative. (This progresses the input position).
2282                 op.m_jumps.append(jumpIfNoAvailableInput(alternative->m_minimumSize));
2283                 // We will reenter after the check, and assume the input position to have been
2284                 // set as appropriate to this alternative.
2285                 op.m_reentry = label();
2286
2287                 m_checkedOffset += alternative->m_minimumSize;
2288                 break;
2289             }
2290             case OpBodyAlternativeNext:
2291             case OpBodyAlternativeEnd: {
2292                 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
2293                 PatternAlternative* alternative = op.m_alternative;
2294
2295                 // If we get here, the prior alternative matched - return success.
2296                 
2297                 // Adjust the stack pointer to remove the pattern's frame.
2298                 removeCallFrame();
2299
2300                 // Load appropriate values into the return register and the first output
2301                 // slot, and return. In the case of pattern with a fixed size, we will
2302                 // not have yet set the value in the first 
2303                 ASSERT(index != returnRegister);
2304                 if (m_pattern.m_body->m_hasFixedSize) {
2305                     move(index, returnRegister);
2306                     if (priorAlternative->m_minimumSize)
2307                         sub32(Imm32(priorAlternative->m_minimumSize), returnRegister);
2308                     if (compileMode == IncludeSubpatterns)
2309                         store32(returnRegister, output);
2310                 } else
2311                     getMatchStart(returnRegister);
2312                 if (compileMode == IncludeSubpatterns)
2313                     store32(index, Address(output, 4));
2314                 move(index, returnRegister2);
2315
2316                 generateReturn();
2317
2318                 // This is the divide between the tail of the prior alternative, above, and
2319                 // the head of the subsequent alternative, below.
2320
2321                 if (op.m_op == OpBodyAlternativeNext) {
2322                     // This is the reentry point for the Next alternative. We expect any code
2323                     // that jumps here to do so with the input position matching that of the
2324                     // PRIOR alteranative, and we will only check input availability if we
2325                     // need to progress it forwards.
2326                     op.m_reentry = label();
2327                     if (alternative->m_minimumSize > priorAlternative->m_minimumSize) {
2328                         add32(Imm32(alternative->m_minimumSize - priorAlternative->m_minimumSize), index);
2329                         op.m_jumps.append(jumpIfNoAvailableInput());
2330                     } else if (priorAlternative->m_minimumSize > alternative->m_minimumSize)
2331                         sub32(Imm32(priorAlternative->m_minimumSize - alternative->m_minimumSize), index);
2332                 } else if (op.m_nextOp == notFound) {
2333                     // This is the reentry point for the End of 'once through' alternatives,
2334                     // jumped to when the last alternative fails to match.
2335                     op.m_reentry = label();
2336                     sub32(Imm32(priorAlternative->m_minimumSize), index);
2337                 }
2338
2339                 if (op.m_op == OpBodyAlternativeNext)
2340                     m_checkedOffset += alternative->m_minimumSize;
2341                 m_checkedOffset -= priorAlternative->m_minimumSize;
2342                 break;
2343             }
2344
2345             // OpSimpleNestedAlternativeBegin/Next/End
2346             // OpNestedAlternativeBegin/Next/End
2347             //
2348             // These nodes are used to handle sets of alternatives that are nested within
2349             // subpatterns and parenthetical assertions. The 'simple' forms are used where
2350             // we do not need to be able to backtrack back into any alternative other than
2351             // the last, the normal forms allow backtracking into any alternative.
2352             //
2353             // Each Begin/Next node is responsible for planting an input check to ensure
2354             // sufficient input is available on entry. Next nodes additionally need to
2355             // jump to the end - Next nodes use the End node's m_jumps list to hold this
2356             // set of jumps.
2357             //
2358             // In the non-simple forms, successful alternative matches must store a
2359             // 'return address' using a DataLabelPtr, used to store the address to jump
2360             // to when backtracking, to get to the code for the appropriate alternative.
2361             case OpSimpleNestedAlternativeBegin:
2362             case OpNestedAlternativeBegin: {
2363                 PatternTerm* term = op.m_term;
2364                 PatternAlternative* alternative = op.m_alternative;
2365                 PatternDisjunction* disjunction = term->parentheses.disjunction;
2366
2367                 // Calculate how much input we need to check for, and if non-zero check.
2368                 op.m_checkAdjust = Checked<unsigned>(alternative->m_minimumSize);
2369                 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
2370                     op.m_checkAdjust -= disjunction->m_minimumSize;
2371                 if (op.m_checkAdjust)
2372                     op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust.unsafeGet()));
2373
2374                 m_checkedOffset += op.m_checkAdjust;
2375                 break;
2376             }
2377             case OpSimpleNestedAlternativeNext:
2378             case OpNestedAlternativeNext: {
2379                 PatternTerm* term = op.m_term;
2380                 PatternAlternative* alternative = op.m_alternative;
2381                 PatternDisjunction* disjunction = term->parentheses.disjunction;
2382
2383                 // In the non-simple case, store a 'return address' so we can backtrack correctly.
2384                 if (op.m_op == OpNestedAlternativeNext) {
2385                     unsigned parenthesesFrameLocation = term->frameLocation;
2386                     op.m_returnAddress = storeToFrameWithPatch(parenthesesFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
2387                 }
2388
2389                 if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
2390                     // If the previous alternative matched without consuming characters then
2391                     // backtrack to try to match while consumming some input.
2392                     op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
2393                 }
2394
2395                 // If we reach here then the last alternative has matched - jump to the
2396                 // End node, to skip over any further alternatives.
2397                 //
2398                 // FIXME: this is logically O(N^2) (though N can be expected to be very
2399                 // small). We could avoid this either by adding an extra jump to the JIT
2400                 // data structures, or by making backtracking code that jumps to Next
2401                 // alternatives are responsible for checking that input is available (if
2402                 // we didn't need to plant the input checks, then m_jumps would be free).
2403                 YarrOp* endOp = &m_ops[op.m_nextOp];
2404                 while (endOp->m_nextOp != notFound) {
2405                     ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
2406                     endOp = &m_ops[endOp->m_nextOp];
2407                 }
2408                 ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
2409                 endOp->m_jumps.append(jump());
2410
2411                 // This is the entry point for the next alternative.
2412                 op.m_reentry = label();
2413
2414                 // Calculate how much input we need to check for, and if non-zero check.
2415                 op.m_checkAdjust = alternative->m_minimumSize;
2416                 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
2417                     op.m_checkAdjust -= disjunction->m_minimumSize;
2418                 if (op.m_checkAdjust)
2419                     op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust.unsafeGet()));
2420
2421                 YarrOp& lastOp = m_ops[op.m_previousOp];
2422                 m_checkedOffset -= lastOp.m_checkAdjust;
2423                 m_checkedOffset += op.m_checkAdjust;
2424                 break;
2425             }
2426             case OpSimpleNestedAlternativeEnd:
2427             case OpNestedAlternativeEnd: {
2428                 PatternTerm* term = op.m_term;
2429
2430                 // In the non-simple case, store a 'return address' so we can backtrack correctly.
2431                 if (op.m_op == OpNestedAlternativeEnd) {
2432                     unsigned parenthesesFrameLocation = term->frameLocation;
2433                     op.m_returnAddress = storeToFrameWithPatch(parenthesesFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
2434                 }
2435
2436                 if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
2437                     // If the previous alternative matched without consuming characters then
2438                     // backtrack to try to match while consumming some input.
2439                     op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
2440                 }
2441
2442                 // If this set of alternatives contains more than one alternative,
2443                 // then the Next nodes will have planted jumps to the End, and added
2444                 // them to this node's m_jumps list.
2445                 op.m_jumps.link(this);
2446                 op.m_jumps.clear();
2447
2448                 YarrOp& lastOp = m_ops[op.m_previousOp];
2449                 m_checkedOffset -= lastOp.m_checkAdjust;
2450                 break;
2451             }
2452
2453             // OpParenthesesSubpatternOnceBegin/End
2454             //
2455             // These nodes support (optionally) capturing subpatterns, that have a
2456             // quantity count of 1 (this covers fixed once, and ?/?? quantifiers). 
2457             case OpParenthesesSubpatternOnceBegin: {
2458                 PatternTerm* term = op.m_term;
2459                 unsigned parenthesesFrameLocation = term->frameLocation;
2460                 const RegisterID indexTemporary = regT0;
2461                 ASSERT(term->quantityMaxCount == 1);
2462
2463                 // Upon entry to a Greedy quantified set of parenthese store the index.
2464                 // We'll use this for two purposes:
2465                 //  - To indicate which iteration we are on of mathing the remainder of
2466                 //    the expression after the parentheses - the first, including the
2467                 //    match within the parentheses, or the second having skipped over them.
2468                 //  - To check for empty matches, which must be rejected.
2469                 //
2470                 // At the head of a NonGreedy set of parentheses we'll immediately set the
2471                 // value on the stack to -1 (indicating a match skipping the subpattern),
2472                 // and plant a jump to the end. We'll also plant a label to backtrack to
2473                 // to reenter the subpattern later, with a store to set up index on the
2474                 // second iteration.
2475                 //
2476                 // FIXME: for capturing parens, could use the index in the capture array?
2477                 if (term->quantityType == QuantifierGreedy)
2478                     storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex());
2479                 else if (term->quantityType == QuantifierNonGreedy) {
2480                     storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex());
2481                     op.m_jumps.append(jump());
2482                     op.m_reentry = label();
2483                     storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex());
2484                 }
2485
2486                 // If the parenthese are capturing, store the starting index value to the
2487                 // captures array, offsetting as necessary.
2488                 //
2489                 // FIXME: could avoid offsetting this value in JIT code, apply
2490                 // offsets only afterwards, at the point the results array is
2491                 // being accessed.
2492                 if (term->capture() && compileMode == IncludeSubpatterns) {
2493                     unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2494                     if (term->quantityType == QuantifierFixedCount)
2495                         inputOffset += term->parentheses.disjunction->m_minimumSize;
2496                     if (inputOffset) {
2497                         move(index, indexTemporary);
2498                         sub32(Imm32(inputOffset), indexTemporary);
2499                         setSubpatternStart(indexTemporary, term->parentheses.subpatternId);
2500                     } else
2501                         setSubpatternStart(index, term->parentheses.subpatternId);
2502                 }
2503                 break;
2504             }
2505             case OpParenthesesSubpatternOnceEnd: {
2506                 PatternTerm* term = op.m_term;
2507                 const RegisterID indexTemporary = regT0;
2508                 ASSERT(term->quantityMaxCount == 1);
2509
2510                 // Runtime ASSERT to make sure that the nested alternative handled the
2511                 // "no input consumed" check.
2512                 if (!ASSERT_DISABLED && term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) {
2513                     Jump pastBreakpoint;
2514                     pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
2515                     abortWithReason(YARRNoInputConsumed);
2516                     pastBreakpoint.link(this);
2517                 }
2518
2519                 // If the parenthese are capturing, store the ending index value to the
2520                 // captures array, offsetting as necessary.
2521                 //
2522                 // FIXME: could avoid offsetting this value in JIT code, apply
2523                 // offsets only afterwards, at the point the results array is
2524                 // being accessed.
2525                 if (term->capture() && compileMode == IncludeSubpatterns) {
2526                     unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2527                     if (inputOffset) {
2528                         move(index, indexTemporary);
2529                         sub32(Imm32(inputOffset), indexTemporary);
2530                         setSubpatternEnd(indexTemporary, term->parentheses.subpatternId);
2531                     } else
2532                         setSubpatternEnd(index, term->parentheses.subpatternId);
2533                 }
2534
2535                 // If the parentheses are quantified Greedy then add a label to jump back
2536                 // to if we get a failed match from after the parentheses. For NonGreedy
2537                 // parentheses, link the jump from before the subpattern to here.
2538                 if (term->quantityType == QuantifierGreedy)
2539                     op.m_reentry = label();
2540                 else if (term->quantityType == QuantifierNonGreedy) {
2541                     YarrOp& beginOp = m_ops[op.m_previousOp];
2542                     beginOp.m_jumps.link(this);
2543                 }
2544                 break;
2545             }
2546
2547             // OpParenthesesSubpatternTerminalBegin/End
2548             case OpParenthesesSubpatternTerminalBegin: {
2549                 PatternTerm* term = op.m_term;
2550                 ASSERT(term->quantityType == QuantifierGreedy);
2551                 ASSERT(term->quantityMaxCount == quantifyInfinite);
2552                 ASSERT(!term->capture());
2553
2554                 // Upon entry set a label to loop back to.
2555                 op.m_reentry = label();
2556
2557                 // Store the start index of the current match; we need to reject zero
2558                 // length matches.
2559                 storeToFrame(index, term->frameLocation + BackTrackInfoParenthesesTerminal::beginIndex());
2560                 break;
2561             }
2562             case OpParenthesesSubpatternTerminalEnd: {
2563                 YarrOp& beginOp = m_ops[op.m_previousOp];
2564                 if (!ASSERT_DISABLED) {
2565                     PatternTerm* term = op.m_term;
2566                     
2567                     // Runtime ASSERT to make sure that the nested alternative handled the
2568                     // "no input consumed" check.
2569                     Jump pastBreakpoint;
2570                     pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
2571                     abortWithReason(YARRNoInputConsumed);
2572                     pastBreakpoint.link(this);
2573                 }
2574
2575                 // We know that the match is non-zero, we can accept it and
2576                 // loop back up to the head of the subpattern.
2577                 jump(beginOp.m_reentry);
2578
2579                 // This is the entry point to jump to when we stop matching - we will
2580                 // do so once the subpattern cannot match any more.
2581                 op.m_reentry = label();
2582                 break;
2583             }
2584
2585             // OpParenthesesSubpatternBegin/End
2586             //
2587             // These nodes support generic subpatterns.
2588             case OpParenthesesSubpatternBegin: {
2589 #if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
2590                 PatternTerm* term = op.m_term;
2591                 unsigned parenthesesFrameLocation = term->frameLocation;
2592
2593                 // Upon entry to a Greedy quantified set of parenthese store the index.
2594                 // We'll use this for two purposes:
2595                 //  - To indicate which iteration we are on of mathing the remainder of
2596                 //    the expression after the parentheses - the first, including the
2597                 //    match within the parentheses, or the second having skipped over them.
2598                 //  - To check for empty matches, which must be rejected.
2599                 //
2600                 // At the head of a NonGreedy set of parentheses we'll immediately set 'begin'
2601                 // in the backtrack info to -1 (indicating a match skipping the subpattern),
2602                 // and plant a jump to the end. We'll also plant a label to backtrack to
2603                 // to reenter the subpattern later, with a store to set 'begin' to current index
2604                 // on the second iteration.
2605                 //
2606                 // FIXME: for capturing parens, could use the index in the capture array?
2607                 if (term->quantityType == QuantifierGreedy || term->quantityType == QuantifierNonGreedy) {
2608                     storeToFrame(TrustedImm32(0), parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
2609                     storeToFrame(TrustedImmPtr(nullptr), parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
2610
2611                     if (term->quantityType == QuantifierNonGreedy) {
2612                         storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
2613                         op.m_jumps.append(jump());
2614                     }
2615                     
2616                     op.m_reentry = label();
2617                     RegisterID currParenContextReg = regT0;
2618                     RegisterID newParenContextReg = regT1;
2619
2620                     loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex(), currParenContextReg);
2621                     allocateParenContext(newParenContextReg);
2622                     storePtr(currParenContextReg, newParenContextReg);
2623                     storeToFrame(newParenContextReg, parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
2624                     saveParenContext(newParenContextReg, regT2, term->parentheses.subpatternId, term->parentheses.lastSubpatternId, parenthesesFrameLocation);
2625                     storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
2626                 }
2627
2628                 // If the parenthese are capturing, store the starting index value to the
2629                 // captures array, offsetting as necessary.
2630                 //
2631                 // FIXME: could avoid offsetting this value in JIT code, apply
2632                 // offsets only afterwards, at the point the results array is
2633                 // being accessed.
2634                 if (term->capture() && compileMode == IncludeSubpatterns) {
2635                     const RegisterID indexTemporary = regT0;
2636                     unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2637                     if (term->quantityType == QuantifierFixedCount)
2638                         inputOffset += term->parentheses.disjunction->m_minimumSize;
2639                     if (inputOffset) {
2640                         move(index, indexTemporary);
2641                         sub32(Imm32(inputOffset), indexTemporary);
2642                         setSubpatternStart(indexTemporary, term->parentheses.subpatternId);
2643                     } else
2644                         setSubpatternStart(index, term->parentheses.subpatternId);
2645                 }
2646 #else // !YARR_JIT_ALL_PARENS_EXPRESSIONS
2647                 RELEASE_ASSERT_NOT_REACHED();
2648 #endif
2649                 break;
2650             }
2651             case OpParenthesesSubpatternEnd: {
2652 #if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
2653                 PatternTerm* term = op.m_term;
2654                 unsigned parenthesesFrameLocation = term->frameLocation;
2655
2656                 // Runtime ASSERT to make sure that the nested alternative handled the
2657                 // "no input consumed" check.
2658                 if (!ASSERT_DISABLED && term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) {
2659                     Jump pastBreakpoint;
2660                     pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*)));
2661                     abortWithReason(YARRNoInputConsumed);
2662                     pastBreakpoint.link(this);
2663                 }
2664
2665                 const RegisterID countTemporary = regT1;
2666
2667                 YarrOp& beginOp = m_ops[op.m_previousOp];
2668                 loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), countTemporary);
2669                 add32(TrustedImm32(1), countTemporary);
2670                 storeToFrame(countTemporary, parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
2671
2672                 // If the parenthese are capturing, store the ending index value to the
2673                 // captures array, offsetting as necessary.
2674                 //
2675                 // FIXME: could avoid offsetting this value in JIT code, apply
2676                 // offsets only afterwards, at the point the results array is
2677                 // being accessed.
2678                 if (term->capture() && compileMode == IncludeSubpatterns) {
2679                     const RegisterID indexTemporary = regT0;
2680                     
2681                     unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2682                     if (inputOffset) {
2683                         move(index, indexTemporary);
2684                         sub32(Imm32(inputOffset), indexTemporary);
2685                         setSubpatternEnd(indexTemporary, term->parentheses.subpatternId);
2686                     } else
2687                         setSubpatternEnd(index, term->parentheses.subpatternId);
2688                 }
2689
2690                 // If the parentheses are quantified Greedy then add a label to jump back
2691                 // to if we get a failed match from after the parentheses. For NonGreedy
2692                 // parentheses, link the jump from before the subpattern to here.
2693                 if (term->quantityType == QuantifierGreedy) {
2694                     if (term->quantityMaxCount != quantifyInfinite)
2695                         branch32(Below, countTemporary, Imm32(term->quantityMaxCount.unsafeGet())).linkTo(beginOp.m_reentry, this);
2696                     else
2697                         jump(beginOp.m_reentry);
2698                     
2699                     op.m_reentry = label();
2700                 } else if (term->quantityType == QuantifierNonGreedy) {
2701                     YarrOp& beginOp = m_ops[op.m_previousOp];
2702                     beginOp.m_jumps.link(this);
2703                     op.m_reentry = label();
2704                 }
2705 #else // !YARR_JIT_ALL_PARENS_EXPRESSIONS
2706                 RELEASE_ASSERT_NOT_REACHED();
2707 #endif
2708                 break;
2709             }
2710
2711             // OpParentheticalAssertionBegin/End
2712             case OpParentheticalAssertionBegin: {
2713                 PatternTerm* term = op.m_term;
2714
2715                 // Store the current index - assertions should not update index, so
2716                 // we will need to restore it upon a successful match.
2717                 unsigned parenthesesFrameLocation = term->frameLocation;
2718                 storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParentheticalAssertion::beginIndex());
2719
2720                 // Check 
2721                 op.m_checkAdjust = m_checkedOffset - term->inputPosition;
2722                 if (op.m_checkAdjust)
2723                     sub32(Imm32(op.m_checkAdjust.unsafeGet()), index);
2724
2725                 m_checkedOffset -= op.m_checkAdjust;
2726                 break;
2727             }
2728             case OpParentheticalAssertionEnd: {
2729                 PatternTerm* term = op.m_term;
2730
2731                 // Restore the input index value.
2732                 unsigned parenthesesFrameLocation = term->frameLocation;
2733                 loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheticalAssertion::beginIndex(), index);
2734
2735                 // If inverted, a successful match of the assertion must be treated
2736                 // as a failure, so jump to backtracking.
2737                 if (term->invert()) {
2738                     op.m_jumps.append(jump());
2739                     op.m_reentry = label();
2740                 }
2741
2742                 YarrOp& lastOp = m_ops[op.m_previousOp];
2743                 m_checkedOffset += lastOp.m_checkAdjust;
2744                 break;
2745             }
2746
2747             case OpMatchFailed:
2748                 removeCallFrame();
2749                 generateFailReturn();
2750                 break;
2751             }
2752
2753             ++opIndex;
2754         } while (opIndex < m_ops.size());
2755     }
2756
2757     void backtrack()
2758     {
2759         // Backwards generate the backtracking code.
2760         size_t opIndex = m_ops.size();
2761         ASSERT(opIndex);
2762
2763         do {
2764             --opIndex;
2765
2766             if (m_disassembler)
2767                 m_disassembler->setForBacktrack(opIndex, label());
2768
2769             YarrOp& op = m_ops[opIndex];
2770             switch (op.m_op) {
2771
2772             case OpTerm:
2773                 backtrackTerm(opIndex);
2774                 break;
2775
2776             // OpBodyAlternativeBegin/Next/End
2777             //
2778             // For each Begin/Next node representing an alternative, we need to decide what to do
2779             // in two circumstances:
2780             //  - If we backtrack back into this node, from within the alternative.
2781             //  - If the input check at the head of the alternative fails (if this exists).
2782             //
2783             // We treat these two cases differently since in the former case we have slightly
2784             // more information - since we are backtracking out of a prior alternative we know
2785             // that at least enough input was available to run it. For example, given the regular
2786             // expression /a|b/, if we backtrack out of the first alternative (a failed pattern
2787             // character match of 'a'), then we need not perform an additional input availability
2788             // check before running the second alternative.
2789             //
2790             // Backtracking required differs for the last alternative, which in the case of the
2791             // repeating set of alternatives must loop. The code generated for the last alternative
2792             // will also be used to handle all input check failures from any prior alternatives -
2793             // these require similar functionality, in seeking the next available alternative for
2794             // which there is sufficient input.
2795             //
2796             // Since backtracking of all other alternatives simply requires us to link backtracks
2797             // to the reentry point for the subsequent alternative, we will only be generating any
2798             // code when backtracking the last alternative.
2799             case OpBodyAlternativeBegin:
2800             case OpBodyAlternativeNext: {
2801                 PatternAlternative* alternative = op.m_alternative;
2802
2803                 if (op.m_op == OpBodyAlternativeNext) {
2804                     PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
2805                     m_checkedOffset += priorAlternative->m_minimumSize;
2806                 }
2807                 m_checkedOffset -= alternative->m_minimumSize;
2808
2809                 // Is this the last alternative? If not, then if we backtrack to this point we just
2810                 // need to jump to try to match the next alternative.
2811                 if (m_ops[op.m_nextOp].m_op != OpBodyAlternativeEnd) {
2812                     m_backtrackingState.linkTo(m_ops[op.m_nextOp].m_reentry, this);
2813                     break;
2814                 }
2815                 YarrOp& endOp = m_ops[op.m_nextOp];
2816
2817                 YarrOp* beginOp = &op;
2818                 while (beginOp->m_op != OpBodyAlternativeBegin) {
2819                     ASSERT(beginOp->m_op == OpBodyAlternativeNext);
2820                     beginOp = &m_ops[beginOp->m_previousOp];
2821                 }
2822
2823                 bool onceThrough = endOp.m_nextOp == notFound;
2824                 
2825                 JumpList lastStickyAlternativeFailures;
2826
2827                 // First, generate code to handle cases where we backtrack out of an attempted match
2828                 // of the last alternative. If this is a 'once through' set of alternatives then we
2829                 // have nothing to do - link this straight through to the End.
2830                 if (onceThrough)
2831                     m_backtrackingState.linkTo(endOp.m_reentry, this);
2832                 else {
2833                     // If we don't need to move the input poistion, and the pattern has a fixed size
2834                     // (in which case we omit the store of the start index until the pattern has matched)
2835                     // then we can just link the backtrack out of the last alternative straight to the
2836                     // head of the first alternative.
2837                     if (m_pattern.m_body->m_hasFixedSize
2838                         && (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize)
2839                         && (alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize == 1))
2840                         m_backtrackingState.linkTo(beginOp->m_reentry, this);
2841                     else if (m_pattern.sticky() && m_ops[op.m_nextOp].m_op == OpBodyAlternativeEnd) {
2842                         // It is a sticky pattern and the last alternative failed, jump to the end.
2843                         m_backtrackingState.takeBacktracksToJumpList(lastStickyAlternativeFailures, this);
2844                     } else {
2845                         // We need to generate a trampoline of code to execute before looping back
2846                         // around to the first alternative.
2847                         m_backtrackingState.link(this);
2848
2849                         // No need to advance and retry for a sticky pattern.
2850                         if (!m_pattern.sticky()) {
2851                             // If the pattern size is not fixed, then store the start index for use if we match.
2852                             if (!m_pattern.m_body->m_hasFixedSize) {
2853                                 if (alternative->m_minimumSize == 1)
2854                                     setMatchStart(index);
2855                                 else {
2856                                     move(index, regT0);
2857                                     if (alternative->m_minimumSize)
2858                                         sub32(Imm32(alternative->m_minimumSize - 1), regT0);
2859                                     else
2860                                         add32(TrustedImm32(1), regT0);
2861                                     setMatchStart(regT0);
2862                                 }
2863                             }
2864
2865                             // Generate code to loop. Check whether the last alternative is longer than the
2866                             // first (e.g. /a|xy/ or /a|xyz/).
2867                             if (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize) {
2868                                 // We want to loop, and increment input position. If the delta is 1, it is
2869                                 // already correctly incremented, if more than one then decrement as appropriate.
2870                                 unsigned delta = alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize;
2871                                 ASSERT(delta);
2872                                 if (delta != 1)
2873                                     sub32(Imm32(delta - 1), index);
2874                                 jump(beginOp->m_reentry);
2875                             } else {
2876                                 // If the first alternative has minimum size 0xFFFFFFFFu, then there cannot
2877                                 // be sufficent input available to handle this, so just fall through.
2878                                 unsigned delta = beginOp->m_alternative->m_minimumSize - alternative->m_minimumSize;
2879                                 if (delta != 0xFFFFFFFFu) {
2880                                     // We need to check input because we are incrementing the input.
2881                                     add32(Imm32(delta + 1), index);
2882                                     checkInput().linkTo(beginOp->m_reentry, this);
2883                                 }
2884                             }
2885                         }
2886                     }
2887                 }
2888
2889                 // We can reach this point in the code in two ways:
2890                 //  - Fallthrough from the code above (a repeating alternative backtracked out of its
2891                 //    last alternative, and did not have sufficent input to run the first).
2892                 //  - We will loop back up to the following label when a repeating alternative loops,
2893                 //    following a failed input check.
2894                 //
2895                 // Either way, we have just failed the input check for the first alternative.
2896                 Label firstInputCheckFailed(this);
2897
2898                 // Generate code to handle input check failures from alternatives except the last.
2899                 // prevOp is the alternative we're handling a bail out from (initially Begin), and
2900                 // nextOp is the alternative we will be attempting to reenter into.
2901                 // 
2902                 // We will link input check failures from the forwards matching path back to the code
2903                 // that can handle them.
2904                 YarrOp* prevOp = beginOp;
2905                 YarrOp* nextOp = &m_ops[beginOp->m_nextOp];
2906                 while (nextOp->m_op != OpBodyAlternativeEnd) {
2907                     prevOp->m_jumps.link(this);
2908
2909                     // We only get here if an input check fails, it is only worth checking again
2910                     // if the next alternative has a minimum size less than the last.
2911                     if (prevOp->m_alternative->m_minimumSize > nextOp->m_alternative->m_minimumSize) {
2912                         // FIXME: if we added an extra label to YarrOp, we could avoid needing to
2913                         // subtract delta back out, and reduce this code. Should performance test
2914                         // the benefit of this.
2915                         unsigned delta = prevOp->m_alternative->m_minimumSize - nextOp->m_alternative->m_minimumSize;
2916                         sub32(Imm32(delta), index);
2917                         Jump fail = jumpIfNoAvailableInput();
2918                         add32(Imm32(delta), index);
2919                         jump(nextOp->m_reentry);
2920                         fail.link(this);
2921                     } else if (prevOp->m_alternative->m_minimumSize < nextOp->m_alternative->m_minimumSize)
2922                         add32(Imm32(nextOp->m_alternative->m_minimumSize - prevOp->m_alternative->m_minimumSize), index);
2923                     prevOp = nextOp;
2924                     nextOp = &m_ops[nextOp->m_nextOp];
2925                 }
2926
2927                 // We fall through to here if there is insufficient input to run the last alternative.
2928
2929                 // If there is insufficient input to run the last alternative, then for 'once through'
2930                 // alternatives we are done - just jump back up into the forwards matching path at the End.
2931                 if (onceThrough) {
2932                     op.m_jumps.linkTo(endOp.m_reentry, this);
2933                     jump(endOp.m_reentry);
2934                     break;
2935                 }
2936
2937                 // For repeating alternatives, link any input check failure from the last alternative to
2938                 // this point.
2939                 op.m_jumps.link(this);
2940
2941                 bool needsToUpdateMatchStart = !m_pattern.m_body->m_hasFixedSize;
2942
2943                 // Check for cases where input position is already incremented by 1 for the last
2944                 // alternative (this is particularly useful where the minimum size of the body
2945                 // disjunction is 0, e.g. /a*|b/).
2946                 if (needsToUpdateMatchStart && alternative->m_minimumSize == 1) {
2947                     // index is already incremented by 1, so just store it now!
2948                     setMatchStart(index);
2949                     needsToUpdateMatchStart = false;
2950                 }
2951
2952                 if (!m_pattern.sticky()) {
2953                     // Check whether there is sufficient input to loop. Increment the input position by
2954                     // one, and check. Also add in the minimum disjunction size before checking - there
2955                     // is no point in looping if we're just going to fail all the input checks around
2956                     // the next iteration.
2957                     ASSERT(alternative->m_minimumSize >= m_pattern.m_body->m_minimumSize);
2958                     if (alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) {
2959                         // If the last alternative had the same minimum size as the disjunction,
2960                         // just simply increment input pos by 1, no adjustment based on minimum size.
2961                         add32(TrustedImm32(1), index);
2962                     } else {
2963                         // If the minumum for the last alternative was one greater than than that
2964                         // for the disjunction, we're already progressed by 1, nothing to do!
2965                         unsigned delta = (alternative->m_minimumSize - m_pattern.m_body->m_minimumSize) - 1;
2966                         if (delta)
2967                             sub32(Imm32(delta), index);
2968                     }
2969                     Jump matchFailed = jumpIfNoAvailableInput();
2970
2971                     if (needsToUpdateMatchStart) {
2972                         if (!m_pattern.m_body->m_minimumSize)
2973                             setMatchStart(index);
2974                         else {
2975                             move(index, regT0);
2976                             sub32(Imm32(m_pattern.m_body->m_minimumSize), regT0);
2977                             setMatchStart(regT0);
2978                         }
2979                     }
2980
2981                     // Calculate how much more input the first alternative requires than the minimum
2982                     // for the body as a whole. If no more is needed then we dont need an additional
2983                     // input check here - jump straight back up to the start of the first alternative.
2984                     if (beginOp->m_alternative->m_minimumSize == m_pattern.m_body->m_minimumSize)
2985                         jump(beginOp->m_reentry);
2986                     else {
2987                         if (beginOp->m_alternative->m_minimumSize > m_pattern.m_body->m_minimumSize)
2988                             add32(Imm32(beginOp->m_alternative->m_minimumSize - m_pattern.m_body->m_minimumSize), index);
2989                         else
2990                             sub32(Imm32(m_pattern.m_body->m_minimumSize - beginOp->m_alternative->m_minimumSize), index);
2991                         checkInput().linkTo(beginOp->m_reentry, this);
2992                         jump(firstInputCheckFailed);
2993                     }
2994
2995                     // We jump to here if we iterate to the point that there is insufficient input to
2996                     // run any matches, and need to return a failure state from JIT code.
2997                     matchFailed.link(this);
2998                 }
2999
3000                 lastStickyAlternativeFailures.link(this);
3001                 removeCallFrame();
3002                 generateFailReturn();
3003                 break;
3004             }
3005             case OpBodyAlternativeEnd: {
3006                 // We should never backtrack back into a body disjunction.
3007                 ASSERT(m_backtrackingState.isEmpty());
3008
3009                 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
3010                 m_checkedOffset += priorAlternative->m_minimumSize;
3011                 break;
3012             }
3013
3014             // OpSimpleNestedAlternativeBegin/Next/End
3015             // OpNestedAlternativeBegin/Next/End
3016             //
3017             // Generate code for when we backtrack back out of an alternative into
3018             // a Begin or Next node, or when the entry input count check fails. If
3019             // there are more alternatives we need to jump to the next alternative,
3020             // if not we backtrack back out of the current set of parentheses.
3021             //
3022             // In the case of non-simple nested assertions we need to also link the
3023             // 'return address' appropriately to backtrack back out into the correct
3024             // alternative.
3025             case OpSimpleNestedAlternativeBegin:
3026             case OpSimpleNestedAlternativeNext:
3027             case OpNestedAlternativeBegin:
3028             case OpNestedAlternativeNext: {
3029                 YarrOp& nextOp = m_ops[op.m_nextOp];
3030                 bool isBegin = op.m_previousOp == notFound;
3031                 bool isLastAlternative = nextOp.m_nextOp == notFound;
3032                 ASSERT(isBegin == (op.m_op == OpSimpleNestedAlternativeBegin || op.m_op == OpNestedAlternativeBegin));
3033                 ASSERT(isLastAlternative == (nextOp.m_op == OpSimpleNestedAlternativeEnd || nextOp.m_op == OpNestedAlternativeEnd));
3034
3035                 // Treat an input check failure the same as a failed match.
3036                 m_backtrackingState.append(op.m_jumps);
3037
3038                 // Set the backtracks to jump to the appropriate place. We may need
3039                 // to link the backtracks in one of three different way depending on
3040                 // the type of alternative we are dealing with:
3041                 //  - A single alternative, with no simplings.
3042                 //  - The last alternative of a set of two or more.
3043                 //  - An alternative other than the last of a set of two or more.
3044                 //
3045                 // In the case of a single alternative on its own, we don't need to
3046                 // jump anywhere - if the alternative fails to match we can just
3047                 // continue to backtrack out of the parentheses without jumping.
3048                 //
3049                 // In the case of the last alternative in a set of more than one, we
3050                 // need to jump to return back out to the beginning. We'll do so by
3051                 // adding a jump to the End node's m_jumps list, and linking this
3052                 // when we come to generate the Begin node. For alternatives other
3053                 // than the last, we need to jump to the next alternative.
3054                 //
3055                 // If the alternative had adjusted the input position we must link
3056                 // backtracking to here, correct, and then jump on. If not we can
3057                 // link the backtracks directly to their destination.
3058                 if (op.m_checkAdjust) {
3059                     // Handle the cases where we need to link the backtracks here.
3060                     m_backtrackingState.link(this);
3061                     sub32(Imm32(op.m_checkAdjust.unsafeGet()), index);
3062                     if (!isLastAlternative) {
3063                         // An alternative that is not the last should jump to its successor.
3064                         jump(nextOp.m_reentry);
3065                     } else if (!isBegin) {
3066                         // The last of more than one alternatives must jump back to the beginning.
3067                         nextOp.m_jumps.append(jump());
3068                     } else {
3069                         // A single alternative on its own can fall through.
3070                         m_backtrackingState.fallthrough();
3071                     }
3072                 } else {
3073                     // Handle the cases where we can link the backtracks directly to their destinations.
3074                     if (!isLastAlternative) {
3075                         // An alternative that is not the last should jump to its successor.
3076                         m_backtrackingState.linkTo(nextOp.m_reentry, this);
3077                     } else if (!isBegin) {
3078                         // The last of more than one alternatives must jump back to the beginning.
3079                         m_backtrackingState.takeBacktracksToJumpList(nextOp.m_jumps, this);
3080                     }
3081                     // In the case of a single alternative on its own do nothing - it can fall through.
3082                 }
3083
3084                 // If there is a backtrack jump from a zero length match link it here.
3085                 if (op.m_zeroLengthMatch.isSet())
3086                     m_backtrackingState.append(op.m_zeroLengthMatch);
3087
3088                 // At this point we've handled the backtracking back into this node.
3089                 // Now link any backtracks that need to jump to here.
3090