Renamed JSGlobalData to VM
[WebKit-https.git] / Source / JavaScriptCore / yarr / YarrJIT.cpp
1 /*
2  * Copyright (C) 2009, 2013 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
24  */
25
26 #include "config.h"
27 #include "YarrJIT.h"
28
29 #include <wtf/ASCIICType.h>
30 #include "LinkBuffer.h"
31 #include "Options.h"
32 #include "Yarr.h"
33 #include "YarrCanonicalizeUCS2.h"
34
35 #if ENABLE(YARR_JIT)
36
37 using namespace WTF;
38
39 namespace JSC { namespace Yarr {
40
41 template<YarrJITCompileMode compileMode>
42 class YarrGenerator : private MacroAssembler {
43     friend void jitCompile(VM*, YarrCodeBlock& jitObject, const String& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
44
45 #if CPU(ARM)
46     static const RegisterID input = ARMRegisters::r0;
47     static const RegisterID index = ARMRegisters::r1;
48     static const RegisterID length = ARMRegisters::r2;
49     static const RegisterID output = ARMRegisters::r4;
50
51     static const RegisterID regT0 = ARMRegisters::r5;
52     static const RegisterID regT1 = ARMRegisters::r6;
53
54     static const RegisterID returnRegister = ARMRegisters::r0;
55     static const RegisterID returnRegister2 = ARMRegisters::r1;
56 #elif CPU(MIPS)
57     static const RegisterID input = MIPSRegisters::a0;
58     static const RegisterID index = MIPSRegisters::a1;
59     static const RegisterID length = MIPSRegisters::a2;
60     static const RegisterID output = MIPSRegisters::a3;
61
62     static const RegisterID regT0 = MIPSRegisters::t4;
63     static const RegisterID regT1 = MIPSRegisters::t5;
64
65     static const RegisterID returnRegister = MIPSRegisters::v0;
66     static const RegisterID returnRegister2 = MIPSRegisters::v1;
67 #elif CPU(SH4)
68     static const RegisterID input = SH4Registers::r4;
69     static const RegisterID index = SH4Registers::r5;
70     static const RegisterID length = SH4Registers::r6;
71     static const RegisterID output = SH4Registers::r7;
72
73     static const RegisterID regT0 = SH4Registers::r0;
74     static const RegisterID regT1 = SH4Registers::r1;
75
76     static const RegisterID returnRegister = SH4Registers::r0;
77     static const RegisterID returnRegister2 = SH4Registers::r1;
78 #elif CPU(X86)
79     static const RegisterID input = X86Registers::eax;
80     static const RegisterID index = X86Registers::edx;
81     static const RegisterID length = X86Registers::ecx;
82     static const RegisterID output = X86Registers::edi;
83
84     static const RegisterID regT0 = X86Registers::ebx;
85     static const RegisterID regT1 = X86Registers::esi;
86
87     static const RegisterID returnRegister = X86Registers::eax;
88     static const RegisterID returnRegister2 = X86Registers::edx;
89 #elif CPU(X86_64)
90 #if !OS(WINDOWS)
91     static const RegisterID input = X86Registers::edi;
92     static const RegisterID index = X86Registers::esi;
93     static const RegisterID length = X86Registers::edx;
94     static const RegisterID output = X86Registers::ecx;
95 #else
96     // If the return value doesn't fit in 64bits, its destination is pointed by rcx and the parameters are shifted.
97     // http://msdn.microsoft.com/en-us/library/7572ztz4.aspx
98     COMPILE_ASSERT(sizeof(MatchResult) > sizeof(void*), MatchResult_does_not_fit_in_64bits);
99     static const RegisterID input = X86Registers::edx;
100     static const RegisterID index = X86Registers::r8;
101     static const RegisterID length = X86Registers::r9;
102     static const RegisterID output = X86Registers::r10;
103 #endif
104
105     static const RegisterID regT0 = X86Registers::eax;
106     static const RegisterID regT1 = X86Registers::ebx;
107
108     static const RegisterID returnRegister = X86Registers::eax;
109     static const RegisterID returnRegister2 = X86Registers::edx;
110 #endif
111
112     void optimizeAlternative(PatternAlternative* alternative)
113     {
114         if (!alternative->m_terms.size())
115             return;
116
117         for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) {
118             PatternTerm& term = alternative->m_terms[i];
119             PatternTerm& nextTerm = alternative->m_terms[i + 1];
120
121             if ((term.type == PatternTerm::TypeCharacterClass)
122                 && (term.quantityType == QuantifierFixedCount)
123                 && (nextTerm.type == PatternTerm::TypePatternCharacter)
124                 && (nextTerm.quantityType == QuantifierFixedCount)) {
125                 PatternTerm termCopy = term;
126                 alternative->m_terms[i] = nextTerm;
127                 alternative->m_terms[i + 1] = termCopy;
128             }
129         }
130     }
131
132     void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount)
133     {
134         do {
135             // pick which range we're going to generate
136             int which = count >> 1;
137             char lo = ranges[which].begin;
138             char hi = ranges[which].end;
139
140             // check if there are any ranges or matches below lo.  If not, just jl to failure -
141             // if there is anything else to check, check that first, if it falls through jmp to failure.
142             if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
143                 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
144
145                 // generate code for all ranges before this one
146                 if (which)
147                     matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
148
149                 while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
150                     matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
151                     ++*matchIndex;
152                 }
153                 failures.append(jump());
154
155                 loOrAbove.link(this);
156             } else if (which) {
157                 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
158
159                 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
160                 failures.append(jump());
161
162                 loOrAbove.link(this);
163             } else
164                 failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));
165
166             while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
167                 ++*matchIndex;
168
169             matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
170             // fall through to here, the value is above hi.
171
172             // shuffle along & loop around if there are any more matches to handle.
173             unsigned next = which + 1;
174             ranges += next;
175             count -= next;
176         } while (count);
177     }
178
179     void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
180     {
181         if (charClass->m_table) {
182             ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table));
183             matchDest.append(branchTest8(charClass->m_tableInverted ? Zero : NonZero, tableEntry));
184             return;
185         }
186         Jump unicodeFail;
187         if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
188             Jump isAscii = branch32(LessThanOrEqual, character, TrustedImm32(0x7f));
189
190             if (charClass->m_matchesUnicode.size()) {
191                 for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
192                     UChar ch = charClass->m_matchesUnicode[i];
193                     matchDest.append(branch32(Equal, character, Imm32(ch)));
194                 }
195             }
196
197             if (charClass->m_rangesUnicode.size()) {
198                 for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
199                     UChar lo = charClass->m_rangesUnicode[i].begin;
200                     UChar hi = charClass->m_rangesUnicode[i].end;
201
202                     Jump below = branch32(LessThan, character, Imm32(lo));
203                     matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
204                     below.link(this);
205                 }
206             }
207
208             unicodeFail = jump();
209             isAscii.link(this);
210         }
211
212         if (charClass->m_ranges.size()) {
213             unsigned matchIndex = 0;
214             JumpList failures;
215             matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size());
216             while (matchIndex < charClass->m_matches.size())
217                 matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++])));
218
219             failures.link(this);
220         } else if (charClass->m_matches.size()) {
221             // optimization: gather 'a','A' etc back together, can mask & test once.
222             Vector<char> matchesAZaz;
223
224             for (unsigned i = 0; i < charClass->m_matches.size(); ++i) {
225                 char ch = charClass->m_matches[i];
226                 if (m_pattern.m_ignoreCase) {
227                     if (isASCIILower(ch)) {
228                         matchesAZaz.append(ch);
229                         continue;
230                     }
231                     if (isASCIIUpper(ch))
232                         continue;
233                 }
234                 matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
235             }
236
237             if (unsigned countAZaz = matchesAZaz.size()) {
238                 or32(TrustedImm32(32), character);
239                 for (unsigned i = 0; i < countAZaz; ++i)
240                     matchDest.append(branch32(Equal, character, TrustedImm32(matchesAZaz[i])));
241             }
242         }
243
244         if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size())
245             unicodeFail.link(this);
246     }
247
248     // Jumps if input not available; will have (incorrectly) incremented already!
249     Jump jumpIfNoAvailableInput(unsigned countToCheck = 0)
250     {
251         if (countToCheck)
252             add32(Imm32(countToCheck), index);
253         return branch32(Above, index, length);
254     }
255
256     Jump jumpIfAvailableInput(unsigned countToCheck)
257     {
258         add32(Imm32(countToCheck), index);
259         return branch32(BelowOrEqual, index, length);
260     }
261
262     Jump checkInput()
263     {
264         return branch32(BelowOrEqual, index, length);
265     }
266
267     Jump atEndOfInput()
268     {
269         return branch32(Equal, index, length);
270     }
271
272     Jump notAtEndOfInput()
273     {
274         return branch32(NotEqual, index, length);
275     }
276
277     Jump jumpIfCharNotEquals(UChar ch, int inputPosition, RegisterID character)
278     {
279         readCharacter(inputPosition, character);
280
281         // For case-insesitive compares, non-ascii characters that have different
282         // upper & lower case representations are converted to a character class.
283         ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch));
284         if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
285             or32(TrustedImm32(0x20), character);
286             ch |= 0x20;
287         }
288
289         return branch32(NotEqual, character, Imm32(ch));
290     }
291
292     void readCharacter(int inputPosition, RegisterID reg)
293     {
294         if (m_charSize == Char8)
295             load8(BaseIndex(input, index, TimesOne, inputPosition * sizeof(char)), reg);
296         else
297             load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg);
298     }
299
300     void storeToFrame(RegisterID reg, unsigned frameLocation)
301     {
302         poke(reg, frameLocation);
303     }
304
305     void storeToFrame(TrustedImm32 imm, unsigned frameLocation)
306     {
307         poke(imm, frameLocation);
308     }
309
310     DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
311     {
312         return storePtrWithPatch(TrustedImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*)));
313     }
314
315     void loadFromFrame(unsigned frameLocation, RegisterID reg)
316     {
317         peek(reg, frameLocation);
318     }
319
320     void loadFromFrameAndJump(unsigned frameLocation)
321     {
322         jump(Address(stackPointerRegister, frameLocation * sizeof(void*)));
323     }
324
325     void initCallFrame()
326     {
327         unsigned callFrameSize = m_pattern.m_body->m_callFrameSize;
328         if (callFrameSize)
329             subPtr(Imm32(callFrameSize * sizeof(void*)), stackPointerRegister);
330     }
331     void removeCallFrame()
332     {
333         unsigned callFrameSize = m_pattern.m_body->m_callFrameSize;
334         if (callFrameSize)
335             addPtr(Imm32(callFrameSize * sizeof(void*)), stackPointerRegister);
336     }
337
338     // Used to record subpatters, should only be called if compileMode is IncludeSubpatterns.
339     void setSubpatternStart(RegisterID reg, unsigned subpattern)
340     {
341         ASSERT(subpattern);
342         // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
343         store32(reg, Address(output, (subpattern << 1) * sizeof(int)));
344     }
345     void setSubpatternEnd(RegisterID reg, unsigned subpattern)
346     {
347         ASSERT(subpattern);
348         // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
349         store32(reg, Address(output, ((subpattern << 1) + 1) * sizeof(int)));
350     }
351     void clearSubpatternStart(unsigned subpattern)
352     {
353         ASSERT(subpattern);
354         // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
355         store32(TrustedImm32(-1), Address(output, (subpattern << 1) * sizeof(int)));
356     }
357
358     // We use one of three different strategies to track the start of the current match,
359     // while matching.
360     // 1) If the pattern has a fixed size, do nothing! - we calculate the value lazily
361     //    at the end of matching. This is irrespective of compileMode, and in this case
362     //    these methods should never be called.
363     // 2) If we're compiling IncludeSubpatterns, 'output' contains a pointer to an output
364     //    vector, store the match start in the output vector.
365     // 3) If we're compiling MatchOnly, 'output' is unused, store the match start directly
366     //    in this register.
367     void setMatchStart(RegisterID reg)
368     {
369         ASSERT(!m_pattern.m_body->m_hasFixedSize);
370         if (compileMode == IncludeSubpatterns)
371             store32(reg, output);
372         else
373             move(reg, output);
374     }
375     void getMatchStart(RegisterID reg)
376     {
377         ASSERT(!m_pattern.m_body->m_hasFixedSize);
378         if (compileMode == IncludeSubpatterns)
379             load32(output, reg);
380         else
381             move(output, reg);
382     }
383
384     enum YarrOpCode {
385         // These nodes wrap body alternatives - those in the main disjunction,
386         // rather than subpatterns or assertions. These are chained together in
387         // a doubly linked list, with a 'begin' node for the first alternative,
388         // a 'next' node for each subsequent alternative, and an 'end' node at
389         // the end. In the case of repeating alternatives, the 'end' node also
390         // has a reference back to 'begin'.
391         OpBodyAlternativeBegin,
392         OpBodyAlternativeNext,
393         OpBodyAlternativeEnd,
394         // Similar to the body alternatives, but used for subpatterns with two
395         // or more alternatives.
396         OpNestedAlternativeBegin,
397         OpNestedAlternativeNext,
398         OpNestedAlternativeEnd,
399         // Used for alternatives in subpatterns where there is only a single
400         // alternative (backtrackingis easier in these cases), or for alternatives
401         // which never need to be backtracked (those in parenthetical assertions,
402         // terminal subpatterns).
403         OpSimpleNestedAlternativeBegin,
404         OpSimpleNestedAlternativeNext,
405         OpSimpleNestedAlternativeEnd,
406         // Used to wrap 'Once' subpattern matches (quantityCount == 1).
407         OpParenthesesSubpatternOnceBegin,
408         OpParenthesesSubpatternOnceEnd,
409         // Used to wrap 'Terminal' subpattern matches (at the end of the regexp).
410         OpParenthesesSubpatternTerminalBegin,
411         OpParenthesesSubpatternTerminalEnd,
412         // Used to wrap parenthetical assertions.
413         OpParentheticalAssertionBegin,
414         OpParentheticalAssertionEnd,
415         // Wraps all simple terms (pattern characters, character classes).
416         OpTerm,
417         // Where an expression contains only 'once through' body alternatives
418         // and no repeating ones, this op is used to return match failure.
419         OpMatchFailed
420     };
421
422     // This structure is used to hold the compiled opcode information,
423     // including reference back to the original PatternTerm/PatternAlternatives,
424     // and JIT compilation data structures.
425     struct YarrOp {
426         explicit YarrOp(PatternTerm* term)
427             : m_op(OpTerm)
428             , m_term(term)
429             , m_isDeadCode(false)
430         {
431         }
432
433         explicit YarrOp(YarrOpCode op)
434             : m_op(op)
435             , m_isDeadCode(false)
436         {
437         }
438
439         // The operation, as a YarrOpCode, and also a reference to the PatternTerm.
440         YarrOpCode m_op;
441         PatternTerm* m_term;
442
443         // For alternatives, this holds the PatternAlternative and doubly linked
444         // references to this alternative's siblings. In the case of the
445         // OpBodyAlternativeEnd node at the end of a section of repeating nodes,
446         // m_nextOp will reference the OpBodyAlternativeBegin node of the first
447         // repeating alternative.
448         PatternAlternative* m_alternative;
449         size_t m_previousOp;
450         size_t m_nextOp;
451
452         // Used to record a set of Jumps out of the generated code, typically
453         // used for jumps out to backtracking code, and a single reentry back
454         // into the code for a node (likely where a backtrack will trigger
455         // rematching).
456         Label m_reentry;
457         JumpList m_jumps;
458
459         // Used for backtracking when the prior alternative did not consume any
460         // characters but matched.
461         Jump m_zeroLengthMatch;
462
463         // This flag is used to null out the second pattern character, when
464         // two are fused to match a pair together.
465         bool m_isDeadCode;
466
467         // Currently used in the case of some of the more complex management of
468         // 'm_checked', to cache the offset used in this alternative, to avoid
469         // recalculating it.
470         int m_checkAdjust;
471
472         // Used by OpNestedAlternativeNext/End to hold the pointer to the
473         // value that will be pushed into the pattern's frame to return to,
474         // upon backtracking back into the disjunction.
475         DataLabelPtr m_returnAddress;
476     };
477
478     // BacktrackingState
479     // This class encapsulates information about the state of code generation
480     // whilst generating the code for backtracking, when a term fails to match.
481     // Upon entry to code generation of the backtracking code for a given node,
482     // the Backtracking state will hold references to all control flow sources
483     // that are outputs in need of further backtracking from the prior node
484     // generated (which is the subsequent operation in the regular expression,
485     // and in the m_ops Vector, since we generated backtracking backwards).
486     // These references to control flow take the form of:
487     //  - A jump list of jumps, to be linked to code that will backtrack them
488     //    further.
489     //  - A set of DataLabelPtr values, to be populated with values to be
490     //    treated effectively as return addresses backtracking into complex
491     //    subpatterns.
492     //  - A flag indicating that the current sequence of generated code up to
493     //    this point requires backtracking.
494     class BacktrackingState {
495     public:
496         BacktrackingState()
497             : m_pendingFallthrough(false)
498         {
499         }
500
501         // Add a jump or jumps, a return address, or set the flag indicating
502         // that the current 'fallthrough' control flow requires backtracking.
503         void append(const Jump& jump)
504         {
505             m_laterFailures.append(jump);
506         }
507         void append(JumpList& jumpList)
508         {
509             m_laterFailures.append(jumpList);
510         }
511         void append(const DataLabelPtr& returnAddress)
512         {
513             m_pendingReturns.append(returnAddress);
514         }
515         void fallthrough()
516         {
517             ASSERT(!m_pendingFallthrough);
518             m_pendingFallthrough = true;
519         }
520
521         // These methods clear the backtracking state, either linking to the
522         // current location, a provided label, or copying the backtracking out
523         // to a JumpList. All actions may require code generation to take place,
524         // and as such are passed a pointer to the assembler.
525         void link(MacroAssembler* assembler)
526         {
527             if (m_pendingReturns.size()) {
528                 Label here(assembler);
529                 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
530                     m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
531                 m_pendingReturns.clear();
532             }
533             m_laterFailures.link(assembler);
534             m_laterFailures.clear();
535             m_pendingFallthrough = false;
536         }
537         void linkTo(Label label, MacroAssembler* assembler)
538         {
539             if (m_pendingReturns.size()) {
540                 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
541                     m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], label));
542                 m_pendingReturns.clear();
543             }
544             if (m_pendingFallthrough)
545                 assembler->jump(label);
546             m_laterFailures.linkTo(label, assembler);
547             m_laterFailures.clear();
548             m_pendingFallthrough = false;
549         }
550         void takeBacktracksToJumpList(JumpList& jumpList, MacroAssembler* assembler)
551         {
552             if (m_pendingReturns.size()) {
553                 Label here(assembler);
554                 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
555                     m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
556                 m_pendingReturns.clear();
557                 m_pendingFallthrough = true;
558             }
559             if (m_pendingFallthrough)
560                 jumpList.append(assembler->jump());
561             jumpList.append(m_laterFailures);
562             m_laterFailures.clear();
563             m_pendingFallthrough = false;
564         }
565
566         bool isEmpty()
567         {
568             return m_laterFailures.empty() && m_pendingReturns.isEmpty() && !m_pendingFallthrough;
569         }
570
571         // Called at the end of code generation to link all return addresses.
572         void linkDataLabels(LinkBuffer& linkBuffer)
573         {
574             ASSERT(isEmpty());
575             for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
576                 linkBuffer.patch(m_backtrackRecords[i].m_dataLabel, linkBuffer.locationOf(m_backtrackRecords[i].m_backtrackLocation));
577         }
578
579     private:
580         struct ReturnAddressRecord {
581             ReturnAddressRecord(DataLabelPtr dataLabel, Label backtrackLocation)
582                 : m_dataLabel(dataLabel)
583                 , m_backtrackLocation(backtrackLocation)
584             {
585             }
586
587             DataLabelPtr m_dataLabel;
588             Label m_backtrackLocation;
589         };
590
591         JumpList m_laterFailures;
592         bool m_pendingFallthrough;
593         Vector<DataLabelPtr, 4> m_pendingReturns;
594         Vector<ReturnAddressRecord, 4> m_backtrackRecords;
595     };
596
597     // Generation methods:
598     // ===================
599
600     // This method provides a default implementation of backtracking common
601     // to many terms; terms commonly jump out of the forwards  matching path
602     // on any failed conditions, and add these jumps to the m_jumps list. If
603     // no special handling is required we can often just backtrack to m_jumps.
604     void backtrackTermDefault(size_t opIndex)
605     {
606         YarrOp& op = m_ops[opIndex];
607         m_backtrackingState.append(op.m_jumps);
608     }
609
610     void generateAssertionBOL(size_t opIndex)
611     {
612         YarrOp& op = m_ops[opIndex];
613         PatternTerm* term = op.m_term;
614
615         if (m_pattern.m_multiline) {
616             const RegisterID character = regT0;
617
618             JumpList matchDest;
619             if (!term->inputPosition)
620                 matchDest.append(branch32(Equal, index, Imm32(m_checked)));
621
622             readCharacter((term->inputPosition - m_checked) - 1, character);
623             matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
624             op.m_jumps.append(jump());
625
626             matchDest.link(this);
627         } else {
628             // Erk, really should poison out these alternatives early. :-/
629             if (term->inputPosition)
630                 op.m_jumps.append(jump());
631             else
632                 op.m_jumps.append(branch32(NotEqual, index, Imm32(m_checked)));
633         }
634     }
635     void backtrackAssertionBOL(size_t opIndex)
636     {
637         backtrackTermDefault(opIndex);
638     }
639
640     void generateAssertionEOL(size_t opIndex)
641     {
642         YarrOp& op = m_ops[opIndex];
643         PatternTerm* term = op.m_term;
644
645         if (m_pattern.m_multiline) {
646             const RegisterID character = regT0;
647
648             JumpList matchDest;
649             if (term->inputPosition == m_checked)
650                 matchDest.append(atEndOfInput());
651
652             readCharacter(term->inputPosition - m_checked, character);
653             matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
654             op.m_jumps.append(jump());
655
656             matchDest.link(this);
657         } else {
658             if (term->inputPosition == m_checked)
659                 op.m_jumps.append(notAtEndOfInput());
660             // Erk, really should poison out these alternatives early. :-/
661             else
662                 op.m_jumps.append(jump());
663         }
664     }
665     void backtrackAssertionEOL(size_t opIndex)
666     {
667         backtrackTermDefault(opIndex);
668     }
669
670     // Also falls though on nextIsNotWordChar.
671     void matchAssertionWordchar(size_t opIndex, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
672     {
673         YarrOp& op = m_ops[opIndex];
674         PatternTerm* term = op.m_term;
675
676         const RegisterID character = regT0;
677
678         if (term->inputPosition == m_checked)
679             nextIsNotWordChar.append(atEndOfInput());
680
681         readCharacter((term->inputPosition - m_checked), character);
682         matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass());
683     }
684
685     void generateAssertionWordBoundary(size_t opIndex)
686     {
687         YarrOp& op = m_ops[opIndex];
688         PatternTerm* term = op.m_term;
689
690         const RegisterID character = regT0;
691
692         Jump atBegin;
693         JumpList matchDest;
694         if (!term->inputPosition)
695             atBegin = branch32(Equal, index, Imm32(m_checked));
696         readCharacter((term->inputPosition - m_checked) - 1, character);
697         matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass());
698         if (!term->inputPosition)
699             atBegin.link(this);
700
701         // We fall through to here if the last character was not a wordchar.
702         JumpList nonWordCharThenWordChar;
703         JumpList nonWordCharThenNonWordChar;
704         if (term->invert()) {
705             matchAssertionWordchar(opIndex, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
706             nonWordCharThenWordChar.append(jump());
707         } else {
708             matchAssertionWordchar(opIndex, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
709             nonWordCharThenNonWordChar.append(jump());
710         }
711         op.m_jumps.append(nonWordCharThenNonWordChar);
712
713         // We jump here if the last character was a wordchar.
714         matchDest.link(this);
715         JumpList wordCharThenWordChar;
716         JumpList wordCharThenNonWordChar;
717         if (term->invert()) {
718             matchAssertionWordchar(opIndex, wordCharThenNonWordChar, wordCharThenWordChar);
719             wordCharThenWordChar.append(jump());
720         } else {
721             matchAssertionWordchar(opIndex, wordCharThenWordChar, wordCharThenNonWordChar);
722             // This can fall-though!
723         }
724
725         op.m_jumps.append(wordCharThenWordChar);
726
727         nonWordCharThenWordChar.link(this);
728         wordCharThenNonWordChar.link(this);
729     }
730     void backtrackAssertionWordBoundary(size_t opIndex)
731     {
732         backtrackTermDefault(opIndex);
733     }
734
735     void generatePatternCharacterOnce(size_t opIndex)
736     {
737         YarrOp& op = m_ops[opIndex];
738
739         if (op.m_isDeadCode)
740             return;
741         
742         // m_ops always ends with a OpBodyAlternativeEnd or OpMatchFailed
743         // node, so there must always be at least one more node.
744         ASSERT(opIndex + 1 < m_ops.size());
745         YarrOp* nextOp = &m_ops[opIndex + 1];
746
747         PatternTerm* term = op.m_term;
748         UChar ch = term->patternCharacter;
749
750         if ((ch > 0xff) && (m_charSize == Char8)) {
751             // Have a 16 bit pattern character and an 8 bit string - short circuit
752             op.m_jumps.append(jump());
753             return;
754         }
755
756         const RegisterID character = regT0;
757         int maxCharactersAtOnce = m_charSize == Char8 ? 4 : 2;
758         unsigned ignoreCaseMask = 0;
759 #if CPU(BIG_ENDIAN)
760         int allCharacters = ch << (m_charSize == Char8 ? 24 : 16);
761 #else
762         int allCharacters = ch;
763 #endif
764         int numberCharacters;
765         int startTermPosition = term->inputPosition;
766
767         // For case-insesitive compares, non-ascii characters that have different
768         // upper & lower case representations are converted to a character class.
769         ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch));
770
771         if (m_pattern.m_ignoreCase && isASCIIAlpha(ch))
772 #if CPU(BIG_ENDIAN)
773             ignoreCaseMask |= 32 << (m_charSize == Char8 ? 24 : 16);
774 #else
775             ignoreCaseMask |= 32;
776 #endif
777
778         for (numberCharacters = 1; numberCharacters < maxCharactersAtOnce && nextOp->m_op == OpTerm; ++numberCharacters, nextOp = &m_ops[opIndex + numberCharacters]) {
779             PatternTerm* nextTerm = nextOp->m_term;
780             
781             if (nextTerm->type != PatternTerm::TypePatternCharacter
782                 || nextTerm->quantityType != QuantifierFixedCount
783                 || nextTerm->quantityCount != 1
784                 || nextTerm->inputPosition != (startTermPosition + numberCharacters))
785                 break;
786
787             nextOp->m_isDeadCode = true;
788
789 #if CPU(BIG_ENDIAN)
790             int shiftAmount = (m_charSize == Char8 ? 24 : 16) - ((m_charSize == Char8 ? 8 : 16) * numberCharacters);
791 #else
792             int shiftAmount = (m_charSize == Char8 ? 8 : 16) * numberCharacters;
793 #endif
794
795             UChar currentCharacter = nextTerm->patternCharacter;
796
797             if ((currentCharacter > 0xff) && (m_charSize == Char8)) {
798                 // Have a 16 bit pattern character and an 8 bit string - short circuit
799                 op.m_jumps.append(jump());
800                 return;
801             }
802
803             // For case-insesitive compares, non-ascii characters that have different
804             // upper & lower case representations are converted to a character class.
805             ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(currentCharacter) || isCanonicallyUnique(currentCharacter));
806
807             allCharacters |= (currentCharacter << shiftAmount);
808
809             if ((m_pattern.m_ignoreCase) && (isASCIIAlpha(currentCharacter)))
810                 ignoreCaseMask |= 32 << shiftAmount;                    
811         }
812
813         if (m_charSize == Char8) {
814             switch (numberCharacters) {
815             case 1:
816                 op.m_jumps.append(jumpIfCharNotEquals(ch, startTermPosition - m_checked, character));
817                 return;
818             case 2: {
819                 BaseIndex address(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar));
820                 load16Unaligned(address, character);
821                 break;
822             }
823             case 3: {
824                 BaseIndex highAddress(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar));
825                 load16Unaligned(highAddress, character);
826                 if (ignoreCaseMask)
827                     or32(Imm32(ignoreCaseMask), character);
828                 op.m_jumps.append(branch32(NotEqual, character, Imm32((allCharacters & 0xffff) | ignoreCaseMask)));
829                 op.m_jumps.append(jumpIfCharNotEquals(allCharacters >> 16, startTermPosition + 2 - m_checked, character));
830                 return;
831             }
832             case 4: {
833                 BaseIndex address(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar));
834                 load32WithUnalignedHalfWords(address, character);
835                 break;
836             }
837             }
838         } else {
839             switch (numberCharacters) {
840             case 1:
841                 op.m_jumps.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character));
842                 return;
843             case 2:
844                 BaseIndex address(input, index, TimesTwo, (term->inputPosition - m_checked) * sizeof(UChar));
845                 load32WithUnalignedHalfWords(address, character);
846                 break;
847             }
848         }
849
850         if (ignoreCaseMask)
851             or32(Imm32(ignoreCaseMask), character);
852         op.m_jumps.append(branch32(NotEqual, character, Imm32(allCharacters | ignoreCaseMask)));
853         return;
854     }
855     void backtrackPatternCharacterOnce(size_t opIndex)
856     {
857         backtrackTermDefault(opIndex);
858     }
859
860     void generatePatternCharacterFixed(size_t opIndex)
861     {
862         YarrOp& op = m_ops[opIndex];
863         PatternTerm* term = op.m_term;
864         UChar ch = term->patternCharacter;
865
866         const RegisterID character = regT0;
867         const RegisterID countRegister = regT1;
868
869         move(index, countRegister);
870         sub32(Imm32(term->quantityCount.unsafeGet()), countRegister);
871
872         Label loop(this);
873         BaseIndex address(input, countRegister, m_charScale, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(m_charSize == Char8 ? sizeof(char) : sizeof(UChar))).unsafeGet());
874
875         if (m_charSize == Char8)
876             load8(address, character);
877         else
878             load16(address, character);
879
880         // For case-insesitive compares, non-ascii characters that have different
881         // upper & lower case representations are converted to a character class.
882         ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch));
883         if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
884             or32(TrustedImm32(0x20), character);
885             ch |= 0x20;
886         }
887
888         op.m_jumps.append(branch32(NotEqual, character, Imm32(ch)));
889         add32(TrustedImm32(1), countRegister);
890         branch32(NotEqual, countRegister, index).linkTo(loop, this);
891     }
892     void backtrackPatternCharacterFixed(size_t opIndex)
893     {
894         backtrackTermDefault(opIndex);
895     }
896
897     void generatePatternCharacterGreedy(size_t opIndex)
898     {
899         YarrOp& op = m_ops[opIndex];
900         PatternTerm* term = op.m_term;
901         UChar ch = term->patternCharacter;
902
903         const RegisterID character = regT0;
904         const RegisterID countRegister = regT1;
905
906         move(TrustedImm32(0), countRegister);
907
908         // Unless have a 16 bit pattern character and an 8 bit string - short circuit
909         if (!((ch > 0xff) && (m_charSize == Char8))) {
910             JumpList failures;
911             Label loop(this);
912             failures.append(atEndOfInput());
913             failures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character));
914
915             add32(TrustedImm32(1), countRegister);
916             add32(TrustedImm32(1), index);
917             if (term->quantityCount == quantifyInfinite)
918                 jump(loop);
919             else
920                 branch32(NotEqual, countRegister, Imm32(term->quantityCount.unsafeGet())).linkTo(loop, this);
921
922             failures.link(this);
923         }
924         op.m_reentry = label();
925
926         storeToFrame(countRegister, term->frameLocation);
927     }
928     void backtrackPatternCharacterGreedy(size_t opIndex)
929     {
930         YarrOp& op = m_ops[opIndex];
931         PatternTerm* term = op.m_term;
932
933         const RegisterID countRegister = regT1;
934
935         m_backtrackingState.link(this);
936
937         loadFromFrame(term->frameLocation, countRegister);
938         m_backtrackingState.append(branchTest32(Zero, countRegister));
939         sub32(TrustedImm32(1), countRegister);
940         sub32(TrustedImm32(1), index);
941         jump(op.m_reentry);
942     }
943
944     void generatePatternCharacterNonGreedy(size_t opIndex)
945     {
946         YarrOp& op = m_ops[opIndex];
947         PatternTerm* term = op.m_term;
948
949         const RegisterID countRegister = regT1;
950
951         move(TrustedImm32(0), countRegister);
952         op.m_reentry = label();
953         storeToFrame(countRegister, term->frameLocation);
954     }
955     void backtrackPatternCharacterNonGreedy(size_t opIndex)
956     {
957         YarrOp& op = m_ops[opIndex];
958         PatternTerm* term = op.m_term;
959         UChar ch = term->patternCharacter;
960
961         const RegisterID character = regT0;
962         const RegisterID countRegister = regT1;
963
964         m_backtrackingState.link(this);
965
966         loadFromFrame(term->frameLocation, countRegister);
967
968         // Unless have a 16 bit pattern character and an 8 bit string - short circuit
969         if (!((ch > 0xff) && (m_charSize == Char8))) {
970             JumpList nonGreedyFailures;
971             nonGreedyFailures.append(atEndOfInput());
972             if (term->quantityCount != quantifyInfinite)
973                 nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet())));
974             nonGreedyFailures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character));
975
976             add32(TrustedImm32(1), countRegister);
977             add32(TrustedImm32(1), index);
978
979             jump(op.m_reentry);
980             nonGreedyFailures.link(this);
981         }
982
983         sub32(countRegister, index);
984         m_backtrackingState.fallthrough();
985     }
986
987     void generateCharacterClassOnce(size_t opIndex)
988     {
989         YarrOp& op = m_ops[opIndex];
990         PatternTerm* term = op.m_term;
991
992         const RegisterID character = regT0;
993
994         JumpList matchDest;
995         readCharacter(term->inputPosition - m_checked, character);
996         matchCharacterClass(character, matchDest, term->characterClass);
997
998         if (term->invert())
999             op.m_jumps.append(matchDest);
1000         else {
1001             op.m_jumps.append(jump());
1002             matchDest.link(this);
1003         }
1004     }
1005     void backtrackCharacterClassOnce(size_t opIndex)
1006     {
1007         backtrackTermDefault(opIndex);
1008     }
1009
1010     void generateCharacterClassFixed(size_t opIndex)
1011     {
1012         YarrOp& op = m_ops[opIndex];
1013         PatternTerm* term = op.m_term;
1014
1015         const RegisterID character = regT0;
1016         const RegisterID countRegister = regT1;
1017
1018         move(index, countRegister);
1019         sub32(Imm32(term->quantityCount.unsafeGet()), countRegister);
1020
1021         Label loop(this);
1022         JumpList matchDest;
1023         if (m_charSize == Char8)
1024             load8(BaseIndex(input, countRegister, TimesOne, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(sizeof(char))).unsafeGet()), character);
1025         else
1026             load16(BaseIndex(input, countRegister, TimesTwo, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(sizeof(UChar))).unsafeGet()), character);
1027         matchCharacterClass(character, matchDest, term->characterClass);
1028
1029         if (term->invert())
1030             op.m_jumps.append(matchDest);
1031         else {
1032             op.m_jumps.append(jump());
1033             matchDest.link(this);
1034         }
1035
1036         add32(TrustedImm32(1), countRegister);
1037         branch32(NotEqual, countRegister, index).linkTo(loop, this);
1038     }
1039     void backtrackCharacterClassFixed(size_t opIndex)
1040     {
1041         backtrackTermDefault(opIndex);
1042     }
1043
1044     void generateCharacterClassGreedy(size_t opIndex)
1045     {
1046         YarrOp& op = m_ops[opIndex];
1047         PatternTerm* term = op.m_term;
1048
1049         const RegisterID character = regT0;
1050         const RegisterID countRegister = regT1;
1051
1052         move(TrustedImm32(0), countRegister);
1053
1054         JumpList failures;
1055         Label loop(this);
1056         failures.append(atEndOfInput());
1057
1058         if (term->invert()) {
1059             readCharacter(term->inputPosition - m_checked, character);
1060             matchCharacterClass(character, failures, term->characterClass);
1061         } else {
1062             JumpList matchDest;
1063             readCharacter(term->inputPosition - m_checked, character);
1064             matchCharacterClass(character, matchDest, term->characterClass);
1065             failures.append(jump());
1066             matchDest.link(this);
1067         }
1068
1069         add32(TrustedImm32(1), countRegister);
1070         add32(TrustedImm32(1), index);
1071         if (term->quantityCount != quantifyInfinite) {
1072             branch32(NotEqual, countRegister, Imm32(term->quantityCount.unsafeGet())).linkTo(loop, this);
1073             failures.append(jump());
1074         } else
1075             jump(loop);
1076
1077         failures.link(this);
1078         op.m_reentry = label();
1079
1080         storeToFrame(countRegister, term->frameLocation);
1081     }
1082     void backtrackCharacterClassGreedy(size_t opIndex)
1083     {
1084         YarrOp& op = m_ops[opIndex];
1085         PatternTerm* term = op.m_term;
1086
1087         const RegisterID countRegister = regT1;
1088
1089         m_backtrackingState.link(this);
1090
1091         loadFromFrame(term->frameLocation, countRegister);
1092         m_backtrackingState.append(branchTest32(Zero, countRegister));
1093         sub32(TrustedImm32(1), countRegister);
1094         sub32(TrustedImm32(1), index);
1095         jump(op.m_reentry);
1096     }
1097
1098     void generateCharacterClassNonGreedy(size_t opIndex)
1099     {
1100         YarrOp& op = m_ops[opIndex];
1101         PatternTerm* term = op.m_term;
1102
1103         const RegisterID countRegister = regT1;
1104
1105         move(TrustedImm32(0), countRegister);
1106         op.m_reentry = label();
1107         storeToFrame(countRegister, term->frameLocation);
1108     }
1109     void backtrackCharacterClassNonGreedy(size_t opIndex)
1110     {
1111         YarrOp& op = m_ops[opIndex];
1112         PatternTerm* term = op.m_term;
1113
1114         const RegisterID character = regT0;
1115         const RegisterID countRegister = regT1;
1116
1117         JumpList nonGreedyFailures;
1118
1119         m_backtrackingState.link(this);
1120
1121         loadFromFrame(term->frameLocation, countRegister);
1122
1123         nonGreedyFailures.append(atEndOfInput());
1124         nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet())));
1125
1126         JumpList matchDest;
1127         readCharacter(term->inputPosition - m_checked, character);
1128         matchCharacterClass(character, matchDest, term->characterClass);
1129
1130         if (term->invert())
1131             nonGreedyFailures.append(matchDest);
1132         else {
1133             nonGreedyFailures.append(jump());
1134             matchDest.link(this);
1135         }
1136
1137         add32(TrustedImm32(1), countRegister);
1138         add32(TrustedImm32(1), index);
1139
1140         jump(op.m_reentry);
1141
1142         nonGreedyFailures.link(this);
1143         sub32(countRegister, index);
1144         m_backtrackingState.fallthrough();
1145     }
1146
1147     void generateDotStarEnclosure(size_t opIndex)
1148     {
1149         YarrOp& op = m_ops[opIndex];
1150         PatternTerm* term = op.m_term;
1151
1152         const RegisterID character = regT0;
1153         const RegisterID matchPos = regT1;
1154
1155         JumpList foundBeginningNewLine;
1156         JumpList saveStartIndex;
1157         JumpList foundEndingNewLine;
1158
1159         ASSERT(!m_pattern.m_body->m_hasFixedSize);
1160         getMatchStart(matchPos);
1161
1162         saveStartIndex.append(branchTest32(Zero, matchPos));
1163         Label findBOLLoop(this);
1164         sub32(TrustedImm32(1), matchPos);
1165         if (m_charSize == Char8)
1166             load8(BaseIndex(input, matchPos, TimesOne, 0), character);
1167         else
1168             load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
1169         matchCharacterClass(character, foundBeginningNewLine, m_pattern.newlineCharacterClass());
1170         branchTest32(NonZero, matchPos).linkTo(findBOLLoop, this);
1171         saveStartIndex.append(jump());
1172
1173         foundBeginningNewLine.link(this);
1174         add32(TrustedImm32(1), matchPos); // Advance past newline
1175         saveStartIndex.link(this);
1176
1177         if (!m_pattern.m_multiline && term->anchors.bolAnchor)
1178             op.m_jumps.append(branchTest32(NonZero, matchPos));
1179
1180         ASSERT(!m_pattern.m_body->m_hasFixedSize);
1181         setMatchStart(matchPos);
1182
1183         move(index, matchPos);
1184
1185         Label findEOLLoop(this);        
1186         foundEndingNewLine.append(branch32(Equal, matchPos, length));
1187         if (m_charSize == Char8)
1188             load8(BaseIndex(input, matchPos, TimesOne, 0), character);
1189         else
1190             load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
1191         matchCharacterClass(character, foundEndingNewLine, m_pattern.newlineCharacterClass());
1192         add32(TrustedImm32(1), matchPos);
1193         jump(findEOLLoop);
1194
1195         foundEndingNewLine.link(this);
1196
1197         if (!m_pattern.m_multiline && term->anchors.eolAnchor)
1198             op.m_jumps.append(branch32(NotEqual, matchPos, length));
1199
1200         move(matchPos, index);
1201     }
1202
1203     void backtrackDotStarEnclosure(size_t opIndex)
1204     {
1205         backtrackTermDefault(opIndex);
1206     }
1207     
1208     // Code generation/backtracking for simple terms
1209     // (pattern characters, character classes, and assertions).
1210     // These methods farm out work to the set of functions above.
1211     void generateTerm(size_t opIndex)
1212     {
1213         YarrOp& op = m_ops[opIndex];
1214         PatternTerm* term = op.m_term;
1215
1216         switch (term->type) {
1217         case PatternTerm::TypePatternCharacter:
1218             switch (term->quantityType) {
1219             case QuantifierFixedCount:
1220                 if (term->quantityCount == 1)
1221                     generatePatternCharacterOnce(opIndex);
1222                 else
1223                     generatePatternCharacterFixed(opIndex);
1224                 break;
1225             case QuantifierGreedy:
1226                 generatePatternCharacterGreedy(opIndex);
1227                 break;
1228             case QuantifierNonGreedy:
1229                 generatePatternCharacterNonGreedy(opIndex);
1230                 break;
1231             }
1232             break;
1233
1234         case PatternTerm::TypeCharacterClass:
1235             switch (term->quantityType) {
1236             case QuantifierFixedCount:
1237                 if (term->quantityCount == 1)
1238                     generateCharacterClassOnce(opIndex);
1239                 else
1240                     generateCharacterClassFixed(opIndex);
1241                 break;
1242             case QuantifierGreedy:
1243                 generateCharacterClassGreedy(opIndex);
1244                 break;
1245             case QuantifierNonGreedy:
1246                 generateCharacterClassNonGreedy(opIndex);
1247                 break;
1248             }
1249             break;
1250
1251         case PatternTerm::TypeAssertionBOL:
1252             generateAssertionBOL(opIndex);
1253             break;
1254
1255         case PatternTerm::TypeAssertionEOL:
1256             generateAssertionEOL(opIndex);
1257             break;
1258
1259         case PatternTerm::TypeAssertionWordBoundary:
1260             generateAssertionWordBoundary(opIndex);
1261             break;
1262
1263         case PatternTerm::TypeForwardReference:
1264             break;
1265
1266         case PatternTerm::TypeParenthesesSubpattern:
1267         case PatternTerm::TypeParentheticalAssertion:
1268             RELEASE_ASSERT_NOT_REACHED();
1269         case PatternTerm::TypeBackReference:
1270             m_shouldFallBack = true;
1271             break;
1272         case PatternTerm::TypeDotStarEnclosure:
1273             generateDotStarEnclosure(opIndex);
1274             break;
1275         }
1276     }
1277     void backtrackTerm(size_t opIndex)
1278     {
1279         YarrOp& op = m_ops[opIndex];
1280         PatternTerm* term = op.m_term;
1281
1282         switch (term->type) {
1283         case PatternTerm::TypePatternCharacter:
1284             switch (term->quantityType) {
1285             case QuantifierFixedCount:
1286                 if (term->quantityCount == 1)
1287                     backtrackPatternCharacterOnce(opIndex);
1288                 else
1289                     backtrackPatternCharacterFixed(opIndex);
1290                 break;
1291             case QuantifierGreedy:
1292                 backtrackPatternCharacterGreedy(opIndex);
1293                 break;
1294             case QuantifierNonGreedy:
1295                 backtrackPatternCharacterNonGreedy(opIndex);
1296                 break;
1297             }
1298             break;
1299
1300         case PatternTerm::TypeCharacterClass:
1301             switch (term->quantityType) {
1302             case QuantifierFixedCount:
1303                 if (term->quantityCount == 1)
1304                     backtrackCharacterClassOnce(opIndex);
1305                 else
1306                     backtrackCharacterClassFixed(opIndex);
1307                 break;
1308             case QuantifierGreedy:
1309                 backtrackCharacterClassGreedy(opIndex);
1310                 break;
1311             case QuantifierNonGreedy:
1312                 backtrackCharacterClassNonGreedy(opIndex);
1313                 break;
1314             }
1315             break;
1316
1317         case PatternTerm::TypeAssertionBOL:
1318             backtrackAssertionBOL(opIndex);
1319             break;
1320
1321         case PatternTerm::TypeAssertionEOL:
1322             backtrackAssertionEOL(opIndex);
1323             break;
1324
1325         case PatternTerm::TypeAssertionWordBoundary:
1326             backtrackAssertionWordBoundary(opIndex);
1327             break;
1328
1329         case PatternTerm::TypeForwardReference:
1330             break;
1331
1332         case PatternTerm::TypeParenthesesSubpattern:
1333         case PatternTerm::TypeParentheticalAssertion:
1334             RELEASE_ASSERT_NOT_REACHED();
1335
1336         case PatternTerm::TypeDotStarEnclosure:
1337             backtrackDotStarEnclosure(opIndex);
1338             break;
1339
1340         case PatternTerm::TypeBackReference:
1341             m_shouldFallBack = true;
1342             break;
1343         }
1344     }
1345
1346     void generate()
1347     {
1348         // Forwards generate the matching code.
1349         ASSERT(m_ops.size());
1350         size_t opIndex = 0;
1351
1352         do {
1353             YarrOp& op = m_ops[opIndex];
1354             switch (op.m_op) {
1355
1356             case OpTerm:
1357                 generateTerm(opIndex);
1358                 break;
1359
1360             // OpBodyAlternativeBegin/Next/End
1361             //
1362             // These nodes wrap the set of alternatives in the body of the regular expression.
1363             // There may be either one or two chains of OpBodyAlternative nodes, one representing
1364             // the 'once through' sequence of alternatives (if any exist), and one representing
1365             // the repeating alternatives (again, if any exist).
1366             //
1367             // Upon normal entry to the Begin alternative, we will check that input is available.
1368             // Reentry to the Begin alternative will take place after the check has taken place,
1369             // and will assume that the input position has already been progressed as appropriate.
1370             //
1371             // Entry to subsequent Next/End alternatives occurs when the prior alternative has
1372             // successfully completed a match - return a success state from JIT code.
1373             //
1374             // Next alternatives allow for reentry optimized to suit backtracking from its
1375             // preceding alternative. It expects the input position to still be set to a position
1376             // appropriate to its predecessor, and it will only perform an input check if the
1377             // predecessor had a minimum size less than its own.
1378             //
1379             // In the case 'once through' expressions, the End node will also have a reentry
1380             // point to jump to when the last alternative fails. Again, this expects the input
1381             // position to still reflect that expected by the prior alternative.
1382             case OpBodyAlternativeBegin: {
1383                 PatternAlternative* alternative = op.m_alternative;
1384
1385                 // Upon entry at the head of the set of alternatives, check if input is available
1386                 // to run the first alternative. (This progresses the input position).
1387                 op.m_jumps.append(jumpIfNoAvailableInput(alternative->m_minimumSize));
1388                 // We will reenter after the check, and assume the input position to have been
1389                 // set as appropriate to this alternative.
1390                 op.m_reentry = label();
1391
1392                 m_checked += alternative->m_minimumSize;
1393                 break;
1394             }
1395             case OpBodyAlternativeNext:
1396             case OpBodyAlternativeEnd: {
1397                 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
1398                 PatternAlternative* alternative = op.m_alternative;
1399
1400                 // If we get here, the prior alternative matched - return success.
1401                 
1402                 // Adjust the stack pointer to remove the pattern's frame.
1403                 removeCallFrame();
1404
1405                 // Load appropriate values into the return register and the first output
1406                 // slot, and return. In the case of pattern with a fixed size, we will
1407                 // not have yet set the value in the first 
1408                 ASSERT(index != returnRegister);
1409                 if (m_pattern.m_body->m_hasFixedSize) {
1410                     move(index, returnRegister);
1411                     if (priorAlternative->m_minimumSize)
1412                         sub32(Imm32(priorAlternative->m_minimumSize), returnRegister);
1413                     if (compileMode == IncludeSubpatterns)
1414                         store32(returnRegister, output);
1415                 } else
1416                     getMatchStart(returnRegister);
1417                 if (compileMode == IncludeSubpatterns)
1418                     store32(index, Address(output, 4));
1419                 move(index, returnRegister2);
1420
1421                 generateReturn();
1422
1423                 // This is the divide between the tail of the prior alternative, above, and
1424                 // the head of the subsequent alternative, below.
1425
1426                 if (op.m_op == OpBodyAlternativeNext) {
1427                     // This is the reentry point for the Next alternative. We expect any code
1428                     // that jumps here to do so with the input position matching that of the
1429                     // PRIOR alteranative, and we will only check input availability if we
1430                     // need to progress it forwards.
1431                     op.m_reentry = label();
1432                     if (alternative->m_minimumSize > priorAlternative->m_minimumSize) {
1433                         add32(Imm32(alternative->m_minimumSize - priorAlternative->m_minimumSize), index);
1434                         op.m_jumps.append(jumpIfNoAvailableInput());
1435                     } else if (priorAlternative->m_minimumSize > alternative->m_minimumSize)
1436                         sub32(Imm32(priorAlternative->m_minimumSize - alternative->m_minimumSize), index);
1437                 } else if (op.m_nextOp == notFound) {
1438                     // This is the reentry point for the End of 'once through' alternatives,
1439                     // jumped to when the last alternative fails to match.
1440                     op.m_reentry = label();
1441                     sub32(Imm32(priorAlternative->m_minimumSize), index);
1442                 }
1443
1444                 if (op.m_op == OpBodyAlternativeNext)
1445                     m_checked += alternative->m_minimumSize;
1446                 m_checked -= priorAlternative->m_minimumSize;
1447                 break;
1448             }
1449
1450             // OpSimpleNestedAlternativeBegin/Next/End
1451             // OpNestedAlternativeBegin/Next/End
1452             //
1453             // These nodes are used to handle sets of alternatives that are nested within
1454             // subpatterns and parenthetical assertions. The 'simple' forms are used where
1455             // we do not need to be able to backtrack back into any alternative other than
1456             // the last, the normal forms allow backtracking into any alternative.
1457             //
1458             // Each Begin/Next node is responsible for planting an input check to ensure
1459             // sufficient input is available on entry. Next nodes additionally need to
1460             // jump to the end - Next nodes use the End node's m_jumps list to hold this
1461             // set of jumps.
1462             //
1463             // In the non-simple forms, successful alternative matches must store a
1464             // 'return address' using a DataLabelPtr, used to store the address to jump
1465             // to when backtracking, to get to the code for the appropriate alternative.
1466             case OpSimpleNestedAlternativeBegin:
1467             case OpNestedAlternativeBegin: {
1468                 PatternTerm* term = op.m_term;
1469                 PatternAlternative* alternative = op.m_alternative;
1470                 PatternDisjunction* disjunction = term->parentheses.disjunction;
1471
1472                 // Calculate how much input we need to check for, and if non-zero check.
1473                 op.m_checkAdjust = alternative->m_minimumSize;
1474                 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
1475                     op.m_checkAdjust -= disjunction->m_minimumSize;
1476                 if (op.m_checkAdjust)
1477                     op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust));
1478
1479                 m_checked += op.m_checkAdjust;
1480                 break;
1481             }
1482             case OpSimpleNestedAlternativeNext:
1483             case OpNestedAlternativeNext: {
1484                 PatternTerm* term = op.m_term;
1485                 PatternAlternative* alternative = op.m_alternative;
1486                 PatternDisjunction* disjunction = term->parentheses.disjunction;
1487
1488                 // In the non-simple case, store a 'return address' so we can backtrack correctly.
1489                 if (op.m_op == OpNestedAlternativeNext) {
1490                     unsigned parenthesesFrameLocation = term->frameLocation;
1491                     unsigned alternativeFrameLocation = parenthesesFrameLocation;
1492                     if (term->quantityType != QuantifierFixedCount)
1493                         alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1494                     op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation);
1495                 }
1496
1497                 if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
1498                     // If the previous alternative matched without consuming characters then
1499                     // backtrack to try to match while consumming some input.
1500                     op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
1501                 }
1502
1503                 // If we reach here then the last alternative has matched - jump to the
1504                 // End node, to skip over any further alternatives.
1505                 //
1506                 // FIXME: this is logically O(N^2) (though N can be expected to be very
1507                 // small). We could avoid this either by adding an extra jump to the JIT
1508                 // data structures, or by making backtracking code that jumps to Next
1509                 // alternatives are responsible for checking that input is available (if
1510                 // we didn't need to plant the input checks, then m_jumps would be free).
1511                 YarrOp* endOp = &m_ops[op.m_nextOp];
1512                 while (endOp->m_nextOp != notFound) {
1513                     ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
1514                     endOp = &m_ops[endOp->m_nextOp];
1515                 }
1516                 ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
1517                 endOp->m_jumps.append(jump());
1518
1519                 // This is the entry point for the next alternative.
1520                 op.m_reentry = label();
1521
1522                 // Calculate how much input we need to check for, and if non-zero check.
1523                 op.m_checkAdjust = alternative->m_minimumSize;
1524                 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
1525                     op.m_checkAdjust -= disjunction->m_minimumSize;
1526                 if (op.m_checkAdjust)
1527                     op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust));
1528
1529                 YarrOp& lastOp = m_ops[op.m_previousOp];
1530                 m_checked -= lastOp.m_checkAdjust;
1531                 m_checked += op.m_checkAdjust;
1532                 break;
1533             }
1534             case OpSimpleNestedAlternativeEnd:
1535             case OpNestedAlternativeEnd: {
1536                 PatternTerm* term = op.m_term;
1537
1538                 // In the non-simple case, store a 'return address' so we can backtrack correctly.
1539                 if (op.m_op == OpNestedAlternativeEnd) {
1540                     unsigned parenthesesFrameLocation = term->frameLocation;
1541                     unsigned alternativeFrameLocation = parenthesesFrameLocation;
1542                     if (term->quantityType != QuantifierFixedCount)
1543                         alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1544                     op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation);
1545                 }
1546
1547                 if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
1548                     // If the previous alternative matched without consuming characters then
1549                     // backtrack to try to match while consumming some input.
1550                     op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
1551                 }
1552
1553                 // If this set of alternatives contains more than one alternative,
1554                 // then the Next nodes will have planted jumps to the End, and added
1555                 // them to this node's m_jumps list.
1556                 op.m_jumps.link(this);
1557                 op.m_jumps.clear();
1558
1559                 YarrOp& lastOp = m_ops[op.m_previousOp];
1560                 m_checked -= lastOp.m_checkAdjust;
1561                 break;
1562             }
1563
1564             // OpParenthesesSubpatternOnceBegin/End
1565             //
1566             // These nodes support (optionally) capturing subpatterns, that have a
1567             // quantity count of 1 (this covers fixed once, and ?/?? quantifiers). 
1568             case OpParenthesesSubpatternOnceBegin: {
1569                 PatternTerm* term = op.m_term;
1570                 unsigned parenthesesFrameLocation = term->frameLocation;
1571                 const RegisterID indexTemporary = regT0;
1572                 ASSERT(term->quantityCount == 1);
1573
1574                 // Upon entry to a Greedy quantified set of parenthese store the index.
1575                 // We'll use this for two purposes:
1576                 //  - To indicate which iteration we are on of mathing the remainder of
1577                 //    the expression after the parentheses - the first, including the
1578                 //    match within the parentheses, or the second having skipped over them.
1579                 //  - To check for empty matches, which must be rejected.
1580                 //
1581                 // At the head of a NonGreedy set of parentheses we'll immediately set the
1582                 // value on the stack to -1 (indicating a match skipping the subpattern),
1583                 // and plant a jump to the end. We'll also plant a label to backtrack to
1584                 // to reenter the subpattern later, with a store to set up index on the
1585                 // second iteration.
1586                 //
1587                 // FIXME: for capturing parens, could use the index in the capture array?
1588                 if (term->quantityType == QuantifierGreedy)
1589                     storeToFrame(index, parenthesesFrameLocation);
1590                 else if (term->quantityType == QuantifierNonGreedy) {
1591                     storeToFrame(TrustedImm32(-1), parenthesesFrameLocation);
1592                     op.m_jumps.append(jump());
1593                     op.m_reentry = label();
1594                     storeToFrame(index, parenthesesFrameLocation);
1595                 }
1596
1597                 // If the parenthese are capturing, store the starting index value to the
1598                 // captures array, offsetting as necessary.
1599                 //
1600                 // FIXME: could avoid offsetting this value in JIT code, apply
1601                 // offsets only afterwards, at the point the results array is
1602                 // being accessed.
1603                 if (term->capture() && compileMode == IncludeSubpatterns) {
1604                     int inputOffset = term->inputPosition - m_checked;
1605                     if (term->quantityType == QuantifierFixedCount)
1606                         inputOffset -= term->parentheses.disjunction->m_minimumSize;
1607                     if (inputOffset) {
1608                         move(index, indexTemporary);
1609                         add32(Imm32(inputOffset), indexTemporary);
1610                         setSubpatternStart(indexTemporary, term->parentheses.subpatternId);
1611                     } else
1612                         setSubpatternStart(index, term->parentheses.subpatternId);
1613                 }
1614                 break;
1615             }
1616             case OpParenthesesSubpatternOnceEnd: {
1617                 PatternTerm* term = op.m_term;
1618                 const RegisterID indexTemporary = regT0;
1619                 ASSERT(term->quantityCount == 1);
1620
1621 #ifndef NDEBUG
1622                 // Runtime ASSERT to make sure that the nested alternative handled the
1623                 // "no input consumed" check.
1624                 if (term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) {
1625                     Jump pastBreakpoint;
1626                     pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
1627                     breakpoint();
1628                     pastBreakpoint.link(this);
1629                 }
1630 #endif
1631
1632                 // If the parenthese are capturing, store the ending index value to the
1633                 // captures array, offsetting as necessary.
1634                 //
1635                 // FIXME: could avoid offsetting this value in JIT code, apply
1636                 // offsets only afterwards, at the point the results array is
1637                 // being accessed.
1638                 if (term->capture() && compileMode == IncludeSubpatterns) {
1639                     int inputOffset = term->inputPosition - m_checked;
1640                     if (inputOffset) {
1641                         move(index, indexTemporary);
1642                         add32(Imm32(inputOffset), indexTemporary);
1643                         setSubpatternEnd(indexTemporary, term->parentheses.subpatternId);
1644                     } else
1645                         setSubpatternEnd(index, term->parentheses.subpatternId);
1646                 }
1647
1648                 // If the parentheses are quantified Greedy then add a label to jump back
1649                 // to if get a failed match from after the parentheses. For NonGreedy
1650                 // parentheses, link the jump from before the subpattern to here.
1651                 if (term->quantityType == QuantifierGreedy)
1652                     op.m_reentry = label();
1653                 else if (term->quantityType == QuantifierNonGreedy) {
1654                     YarrOp& beginOp = m_ops[op.m_previousOp];
1655                     beginOp.m_jumps.link(this);
1656                 }
1657                 break;
1658             }
1659
1660             // OpParenthesesSubpatternTerminalBegin/End
1661             case OpParenthesesSubpatternTerminalBegin: {
1662                 PatternTerm* term = op.m_term;
1663                 ASSERT(term->quantityType == QuantifierGreedy);
1664                 ASSERT(term->quantityCount == quantifyInfinite);
1665                 ASSERT(!term->capture());
1666
1667                 // Upon entry set a label to loop back to.
1668                 op.m_reentry = label();
1669
1670                 // Store the start index of the current match; we need to reject zero
1671                 // length matches.
1672                 storeToFrame(index, term->frameLocation);
1673                 break;
1674             }
1675             case OpParenthesesSubpatternTerminalEnd: {
1676                 YarrOp& beginOp = m_ops[op.m_previousOp];
1677 #ifndef NDEBUG
1678                 PatternTerm* term = op.m_term;
1679
1680                 // Runtime ASSERT to make sure that the nested alternative handled the
1681                 // "no input consumed" check.
1682                 Jump pastBreakpoint;
1683                 pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
1684                 breakpoint();
1685                 pastBreakpoint.link(this);
1686 #endif
1687
1688                 // We know that the match is non-zero, we can accept it  and
1689                 // loop back up to the head of the subpattern.
1690                 jump(beginOp.m_reentry);
1691
1692                 // This is the entry point to jump to when we stop matching - we will
1693                 // do so once the subpattern cannot match any more.
1694                 op.m_reentry = label();
1695                 break;
1696             }
1697
1698             // OpParentheticalAssertionBegin/End
1699             case OpParentheticalAssertionBegin: {
1700                 PatternTerm* term = op.m_term;
1701
1702                 // Store the current index - assertions should not update index, so
1703                 // we will need to restore it upon a successful match.
1704                 unsigned parenthesesFrameLocation = term->frameLocation;
1705                 storeToFrame(index, parenthesesFrameLocation);
1706
1707                 // Check 
1708                 op.m_checkAdjust = m_checked - term->inputPosition;
1709                 if (op.m_checkAdjust)
1710                     sub32(Imm32(op.m_checkAdjust), index);
1711
1712                 m_checked -= op.m_checkAdjust;
1713                 break;
1714             }
1715             case OpParentheticalAssertionEnd: {
1716                 PatternTerm* term = op.m_term;
1717
1718                 // Restore the input index value.
1719                 unsigned parenthesesFrameLocation = term->frameLocation;
1720                 loadFromFrame(parenthesesFrameLocation, index);
1721
1722                 // If inverted, a successful match of the assertion must be treated
1723                 // as a failure, so jump to backtracking.
1724                 if (term->invert()) {
1725                     op.m_jumps.append(jump());
1726                     op.m_reentry = label();
1727                 }
1728
1729                 YarrOp& lastOp = m_ops[op.m_previousOp];
1730                 m_checked += lastOp.m_checkAdjust;
1731                 break;
1732             }
1733
1734             case OpMatchFailed:
1735                 removeCallFrame();
1736                 move(TrustedImmPtr((void*)WTF::notFound), returnRegister);
1737                 move(TrustedImm32(0), returnRegister2);
1738                 generateReturn();
1739                 break;
1740             }
1741
1742             ++opIndex;
1743         } while (opIndex < m_ops.size());
1744     }
1745
1746     void backtrack()
1747     {
1748         // Backwards generate the backtracking code.
1749         size_t opIndex = m_ops.size();
1750         ASSERT(opIndex);
1751
1752         do {
1753             --opIndex;
1754             YarrOp& op = m_ops[opIndex];
1755             switch (op.m_op) {
1756
1757             case OpTerm:
1758                 backtrackTerm(opIndex);
1759                 break;
1760
1761             // OpBodyAlternativeBegin/Next/End
1762             //
1763             // For each Begin/Next node representing an alternative, we need to decide what to do
1764             // in two circumstances:
1765             //  - If we backtrack back into this node, from within the alternative.
1766             //  - If the input check at the head of the alternative fails (if this exists).
1767             //
1768             // We treat these two cases differently since in the former case we have slightly
1769             // more information - since we are backtracking out of a prior alternative we know
1770             // that at least enough input was available to run it. For example, given the regular
1771             // expression /a|b/, if we backtrack out of the first alternative (a failed pattern
1772             // character match of 'a'), then we need not perform an additional input availability
1773             // check before running the second alternative.
1774             //
1775             // Backtracking required differs for the last alternative, which in the case of the
1776             // repeating set of alternatives must loop. The code generated for the last alternative
1777             // will also be used to handle all input check failures from any prior alternatives -
1778             // these require similar functionality, in seeking the next available alternative for
1779             // which there is sufficient input.
1780             //
1781             // Since backtracking of all other alternatives simply requires us to link backtracks
1782             // to the reentry point for the subsequent alternative, we will only be generating any
1783             // code when backtracking the last alternative.
1784             case OpBodyAlternativeBegin:
1785             case OpBodyAlternativeNext: {
1786                 PatternAlternative* alternative = op.m_alternative;
1787
1788                 if (op.m_op == OpBodyAlternativeNext) {
1789                     PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
1790                     m_checked += priorAlternative->m_minimumSize;
1791                 }
1792                 m_checked -= alternative->m_minimumSize;
1793
1794                 // Is this the last alternative? If not, then if we backtrack to this point we just
1795                 // need to jump to try to match the next alternative.
1796                 if (m_ops[op.m_nextOp].m_op != OpBodyAlternativeEnd) {
1797                     m_backtrackingState.linkTo(m_ops[op.m_nextOp].m_reentry, this);
1798                     break;
1799                 }
1800                 YarrOp& endOp = m_ops[op.m_nextOp];
1801
1802                 YarrOp* beginOp = &op;
1803                 while (beginOp->m_op != OpBodyAlternativeBegin) {
1804                     ASSERT(beginOp->m_op == OpBodyAlternativeNext);
1805                     beginOp = &m_ops[beginOp->m_previousOp];
1806                 }
1807
1808                 bool onceThrough = endOp.m_nextOp == notFound;
1809
1810                 // First, generate code to handle cases where we backtrack out of an attempted match
1811                 // of the last alternative. If this is a 'once through' set of alternatives then we
1812                 // have nothing to do - link this straight through to the End.
1813                 if (onceThrough)
1814                     m_backtrackingState.linkTo(endOp.m_reentry, this);
1815                 else {
1816                     // If we don't need to move the input poistion, and the pattern has a fixed size
1817                     // (in which case we omit the store of the start index until the pattern has matched)
1818                     // then we can just link the backtrack out of the last alternative straight to the
1819                     // head of the first alternative.
1820                     if (m_pattern.m_body->m_hasFixedSize
1821                         && (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize)
1822                         && (alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize == 1))
1823                         m_backtrackingState.linkTo(beginOp->m_reentry, this);
1824                     else {
1825                         // We need to generate a trampoline of code to execute before looping back
1826                         // around to the first alternative.
1827                         m_backtrackingState.link(this);
1828
1829                         // If the pattern size is not fixed, then store the start index, for use if we match.
1830                         if (!m_pattern.m_body->m_hasFixedSize) {
1831                             if (alternative->m_minimumSize == 1)
1832                                 setMatchStart(index);
1833                             else {
1834                                 move(index, regT0);
1835                                 if (alternative->m_minimumSize)
1836                                     sub32(Imm32(alternative->m_minimumSize - 1), regT0);
1837                                 else
1838                                     add32(TrustedImm32(1), regT0);
1839                                 setMatchStart(regT0);
1840                             }
1841                         }
1842
1843                         // Generate code to loop. Check whether the last alternative is longer than the
1844                         // first (e.g. /a|xy/ or /a|xyz/).
1845                         if (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize) {
1846                             // We want to loop, and increment input position. If the delta is 1, it is
1847                             // already correctly incremented, if more than one then decrement as appropriate.
1848                             unsigned delta = alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize;
1849                             ASSERT(delta);
1850                             if (delta != 1)
1851                                 sub32(Imm32(delta - 1), index);
1852                             jump(beginOp->m_reentry);
1853                         } else {
1854                             // If the first alternative has minimum size 0xFFFFFFFFu, then there cannot
1855                             // be sufficent input available to handle this, so just fall through.
1856                             unsigned delta = beginOp->m_alternative->m_minimumSize - alternative->m_minimumSize;
1857                             if (delta != 0xFFFFFFFFu) {
1858                                 // We need to check input because we are incrementing the input.
1859                                 add32(Imm32(delta + 1), index);
1860                                 checkInput().linkTo(beginOp->m_reentry, this);
1861                             }
1862                         }
1863                     }
1864                 }
1865
1866                 // We can reach this point in the code in two ways:
1867                 //  - Fallthrough from the code above (a repeating alternative backtracked out of its
1868                 //    last alternative, and did not have sufficent input to run the first).
1869                 //  - We will loop back up to the following label when a releating alternative loops,
1870                 //    following a failed input check.
1871                 //
1872                 // Either way, we have just failed the input check for the first alternative.
1873                 Label firstInputCheckFailed(this);
1874
1875                 // Generate code to handle input check failures from alternatives except the last.
1876                 // prevOp is the alternative we're handling a bail out from (initially Begin), and
1877                 // nextOp is the alternative we will be attempting to reenter into.
1878                 // 
1879                 // We will link input check failures from the forwards matching path back to the code
1880                 // that can handle them.
1881                 YarrOp* prevOp = beginOp;
1882                 YarrOp* nextOp = &m_ops[beginOp->m_nextOp];
1883                 while (nextOp->m_op != OpBodyAlternativeEnd) {
1884                     prevOp->m_jumps.link(this);
1885
1886                     // We only get here if an input check fails, it is only worth checking again
1887                     // if the next alternative has a minimum size less than the last.
1888                     if (prevOp->m_alternative->m_minimumSize > nextOp->m_alternative->m_minimumSize) {
1889                         // FIXME: if we added an extra label to YarrOp, we could avoid needing to
1890                         // subtract delta back out, and reduce this code. Should performance test
1891                         // the benefit of this.
1892                         unsigned delta = prevOp->m_alternative->m_minimumSize - nextOp->m_alternative->m_minimumSize;
1893                         sub32(Imm32(delta), index);
1894                         Jump fail = jumpIfNoAvailableInput();
1895                         add32(Imm32(delta), index);
1896                         jump(nextOp->m_reentry);
1897                         fail.link(this);
1898                     } else if (prevOp->m_alternative->m_minimumSize < nextOp->m_alternative->m_minimumSize)
1899                         add32(Imm32(nextOp->m_alternative->m_minimumSize - prevOp->m_alternative->m_minimumSize), index);
1900                     prevOp = nextOp;
1901                     nextOp = &m_ops[nextOp->m_nextOp];
1902                 }
1903
1904                 // We fall through to here if there is insufficient input to run the last alternative.
1905
1906                 // If there is insufficient input to run the last alternative, then for 'once through'
1907                 // alternatives we are done - just jump back up into the forwards matching path at the End.
1908                 if (onceThrough) {
1909                     op.m_jumps.linkTo(endOp.m_reentry, this);
1910                     jump(endOp.m_reentry);
1911                     break;
1912                 }
1913
1914                 // For repeating alternatives, link any input check failure from the last alternative to
1915                 // this point.
1916                 op.m_jumps.link(this);
1917
1918                 bool needsToUpdateMatchStart = !m_pattern.m_body->m_hasFixedSize;
1919
1920                 // Check for cases where input position is already incremented by 1 for the last
1921                 // alternative (this is particularly useful where the minimum size of the body
1922                 // disjunction is 0, e.g. /a*|b/).
1923                 if (needsToUpdateMatchStart && alternative->m_minimumSize == 1) {
1924                     // index is already incremented by 1, so just store it now!
1925                     setMatchStart(index);
1926                     needsToUpdateMatchStart = false;
1927                 }
1928
1929                 // Check whether there is sufficient input to loop. Increment the input position by
1930                 // one, and check. Also add in the minimum disjunction size before checking - there
1931                 // is no point in looping if we're just going to fail all the input checks around
1932                 // the next iteration.
1933                 ASSERT(alternative->m_minimumSize >= m_pattern.m_body->m_minimumSize);
1934                 if (alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) {
1935                     // If the last alternative had the same minimum size as the disjunction,
1936                     // just simply increment input pos by 1, no adjustment based on minimum size.
1937                     add32(TrustedImm32(1), index);
1938                 } else {
1939                     // If the minumum for the last alternative was one greater than than that
1940                     // for the disjunction, we're already progressed by 1, nothing to do!
1941                     unsigned delta = (alternative->m_minimumSize - m_pattern.m_body->m_minimumSize) - 1;
1942                     if (delta)
1943                         sub32(Imm32(delta), index);
1944                 }
1945                 Jump matchFailed = jumpIfNoAvailableInput();
1946
1947                 if (needsToUpdateMatchStart) {
1948                     if (!m_pattern.m_body->m_minimumSize)
1949                         setMatchStart(index);
1950                     else {
1951                         move(index, regT0);
1952                         sub32(Imm32(m_pattern.m_body->m_minimumSize), regT0);
1953                         setMatchStart(regT0);
1954                     }
1955                 }
1956
1957                 // Calculate how much more input the first alternative requires than the minimum
1958                 // for the body as a whole. If no more is needed then we dont need an additional
1959                 // input check here - jump straight back up to the start of the first alternative.
1960                 if (beginOp->m_alternative->m_minimumSize == m_pattern.m_body->m_minimumSize)
1961                     jump(beginOp->m_reentry);
1962                 else {
1963                     if (beginOp->m_alternative->m_minimumSize > m_pattern.m_body->m_minimumSize)
1964                         add32(Imm32(beginOp->m_alternative->m_minimumSize - m_pattern.m_body->m_minimumSize), index);
1965                     else
1966                         sub32(Imm32(m_pattern.m_body->m_minimumSize - beginOp->m_alternative->m_minimumSize), index);
1967                     checkInput().linkTo(beginOp->m_reentry, this);
1968                     jump(firstInputCheckFailed);
1969                 }
1970
1971                 // We jump to here if we iterate to the point that there is insufficient input to
1972                 // run any matches, and need to return a failure state from JIT code.
1973                 matchFailed.link(this);
1974
1975                 removeCallFrame();
1976                 move(TrustedImmPtr((void*)WTF::notFound), returnRegister);
1977                 move(TrustedImm32(0), returnRegister2);
1978                 generateReturn();
1979                 break;
1980             }
1981             case OpBodyAlternativeEnd: {
1982                 // We should never backtrack back into a body disjunction.
1983                 ASSERT(m_backtrackingState.isEmpty());
1984
1985                 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
1986                 m_checked += priorAlternative->m_minimumSize;
1987                 break;
1988             }
1989
1990             // OpSimpleNestedAlternativeBegin/Next/End
1991             // OpNestedAlternativeBegin/Next/End
1992             //
1993             // Generate code for when we backtrack back out of an alternative into
1994             // a Begin or Next node, or when the entry input count check fails. If
1995             // there are more alternatives we need to jump to the next alternative,
1996             // if not we backtrack back out of the current set of parentheses.
1997             //
1998             // In the case of non-simple nested assertions we need to also link the
1999             // 'return address' appropriately to backtrack back out into the correct
2000             // alternative.
2001             case OpSimpleNestedAlternativeBegin:
2002             case OpSimpleNestedAlternativeNext:
2003             case OpNestedAlternativeBegin:
2004             case OpNestedAlternativeNext: {
2005                 YarrOp& nextOp = m_ops[op.m_nextOp];
2006                 bool isBegin = op.m_previousOp == notFound;
2007                 bool isLastAlternative = nextOp.m_nextOp == notFound;
2008                 ASSERT(isBegin == (op.m_op == OpSimpleNestedAlternativeBegin || op.m_op == OpNestedAlternativeBegin));
2009                 ASSERT(isLastAlternative == (nextOp.m_op == OpSimpleNestedAlternativeEnd || nextOp.m_op == OpNestedAlternativeEnd));
2010
2011                 // Treat an input check failure the same as a failed match.
2012                 m_backtrackingState.append(op.m_jumps);
2013
2014                 // Set the backtracks to jump to the appropriate place. We may need
2015                 // to link the backtracks in one of three different way depending on
2016                 // the type of alternative we are dealing with:
2017                 //  - A single alternative, with no simplings.
2018                 //  - The last alternative of a set of two or more.
2019                 //  - An alternative other than the last of a set of two or more.
2020                 //
2021                 // In the case of a single alternative on its own, we don't need to
2022                 // jump anywhere - if the alternative fails to match we can just
2023                 // continue to backtrack out of the parentheses without jumping.
2024                 //
2025                 // In the case of the last alternative in a set of more than one, we
2026                 // need to jump to return back out to the beginning. We'll do so by
2027                 // adding a jump to the End node's m_jumps list, and linking this
2028                 // when we come to generate the Begin node. For alternatives other
2029                 // than the last, we need to jump to the next alternative.
2030                 //
2031                 // If the alternative had adjusted the input position we must link
2032                 // backtracking to here, correct, and then jump on. If not we can
2033                 // link the backtracks directly to their destination.
2034                 if (op.m_checkAdjust) {
2035                     // Handle the cases where we need to link the backtracks here.
2036                     m_backtrackingState.link(this);
2037                     sub32(Imm32(op.m_checkAdjust), index);
2038                     if (!isLastAlternative) {
2039                         // An alternative that is not the last should jump to its successor.
2040                         jump(nextOp.m_reentry);
2041                     } else if (!isBegin) {
2042                         // The last of more than one alternatives must jump back to the beginning.
2043                         nextOp.m_jumps.append(jump());
2044                     } else {
2045                         // A single alternative on its own can fall through.
2046                         m_backtrackingState.fallthrough();
2047                     }
2048                 } else {
2049                     // Handle the cases where we can link the backtracks directly to their destinations.
2050                     if (!isLastAlternative) {
2051                         // An alternative that is not the last should jump to its successor.
2052                         m_backtrackingState.linkTo(nextOp.m_reentry, this);
2053                     } else if (!isBegin) {
2054                         // The last of more than one alternatives must jump back to the beginning.
2055                         m_backtrackingState.takeBacktracksToJumpList(nextOp.m_jumps, this);
2056                     }
2057                     // In the case of a single alternative on its own do nothing - it can fall through.
2058                 }
2059
2060                 // If there is a backtrack jump from a zero length match link it here.
2061                 if (op.m_zeroLengthMatch.isSet())
2062                     m_backtrackingState.append(op.m_zeroLengthMatch);
2063
2064                 // At this point we've handled the backtracking back into this node.
2065                 // Now link any backtracks that need to jump to here.
2066
2067                 // For non-simple alternatives, link the alternative's 'return address'
2068                 // so that we backtrack back out into the previous alternative.
2069                 if (op.m_op == OpNestedAlternativeNext)
2070                     m_backtrackingState.append(op.m_returnAddress);
2071
2072                 // If there is more than one alternative, then the last alternative will
2073                 // have planted a jump to be linked to the end. This jump was added to the
2074                 // End node's m_jumps list. If we are back at the beginning, link it here.
2075                 if (isBegin) {
2076                     YarrOp* endOp = &m_ops[op.m_nextOp];
2077                     while (endOp->m_nextOp != notFound) {
2078                         ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
2079                         endOp = &m_ops[endOp->m_nextOp];
2080                     }
2081                     ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
2082                     m_backtrackingState.append(endOp->m_jumps);
2083                 }
2084
2085                 if (!isBegin) {
2086                     YarrOp& lastOp = m_ops[op.m_previousOp];
2087                     m_checked += lastOp.m_checkAdjust;
2088                 }
2089                 m_checked -= op.m_checkAdjust;
2090                 break;
2091             }
2092             case OpSimpleNestedAlternativeEnd:
2093             case OpNestedAlternativeEnd: {
2094                 PatternTerm* term = op.m_term;
2095
2096                 // If there is a backtrack jump from a zero length match link it here.
2097                 if (op.m_zeroLengthMatch.isSet())
2098                     m_backtrackingState.append(op.m_zeroLengthMatch);
2099
2100                 // If we backtrack into the end of a simple subpattern do nothing;
2101                 // just continue through into the last alternative. If we backtrack
2102                 // into the end of a non-simple set of alterntives we need to jump
2103                 // to the backtracking return address set up during generation.
2104                 if (op.m_op == OpNestedAlternativeEnd) {
2105                     m_backtrackingState.link(this);
2106
2107                     // Plant a jump to the return address.
2108                     unsigned parenthesesFrameLocation = term->frameLocation;
2109                     unsigned alternativeFrameLocation = parenthesesFrameLocation;
2110                     if (term->quantityType != QuantifierFixedCount)
2111                         alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
2112                     loadFromFrameAndJump(alternativeFrameLocation);
2113
2114                     // Link the DataLabelPtr associated with the end of the last
2115                     // alternative to this point.
2116                     m_backtrackingState.append(op.m_returnAddress);
2117                 }
2118
2119                 YarrOp& lastOp = m_ops[op.m_previousOp];
2120                 m_checked += lastOp.m_checkAdjust;
2121                 break;
2122             }
2123
2124             // OpParenthesesSubpatternOnceBegin/End
2125             //
2126             // When we are backtracking back out of a capturing subpattern we need
2127             // to clear the start index in the matches output array, to record that
2128             // this subpattern has not been captured.
2129             //
2130             // When backtracking back out of a Greedy quantified subpattern we need
2131             // to catch this, and try running the remainder of the alternative after
2132             // the subpattern again, skipping the parentheses.
2133             //
2134             // Upon backtracking back into a quantified set of parentheses we need to
2135             // check whether we were currently skipping the subpattern. If not, we
2136             // can backtrack into them, if we were we need to either backtrack back
2137             // out of the start of the parentheses, or jump back to the forwards
2138             // matching start, depending of whether the match is Greedy or NonGreedy.
2139             case OpParenthesesSubpatternOnceBegin: {
2140                 PatternTerm* term = op.m_term;
2141                 ASSERT(term->quantityCount == 1);
2142
2143                 // We only need to backtrack to thispoint if capturing or greedy.
2144                 if ((term->capture() && compileMode == IncludeSubpatterns) || term->quantityType == QuantifierGreedy) {
2145                     m_backtrackingState.link(this);
2146
2147                     // If capturing, clear the capture (we only need to reset start).
2148                     if (term->capture() && compileMode == IncludeSubpatterns)
2149                         clearSubpatternStart(term->parentheses.subpatternId);
2150
2151                     // If Greedy, jump to the end.
2152                     if (term->quantityType == QuantifierGreedy) {
2153                         // Clear the flag in the stackframe indicating we ran through the subpattern.
2154                         unsigned parenthesesFrameLocation = term->frameLocation;
2155                         storeToFrame(TrustedImm32(-1), parenthesesFrameLocation);
2156                         // Jump to after the parentheses, skipping the subpattern.
2157                         jump(m_ops[op.m_nextOp].m_reentry);
2158                         // A backtrack from after the parentheses, when skipping the subpattern,
2159                         // will jump back to here.
2160                         op.m_jumps.link(this);
2161                     }
2162
2163                     m_backtrackingState.fallthrough();
2164                 }
2165                 break;
2166             }
2167             case OpParenthesesSubpatternOnceEnd: {
2168                 PatternTerm* term = op.m_term;
2169
2170                 if (term->quantityType != QuantifierFixedCount) {
2171                     m_backtrackingState.link(this);
2172
2173                     // Check whether we should backtrack back into the parentheses, or if we
2174                     // are currently in a state where we had skipped over the subpattern
2175                     // (in which case the flag value on the stack will be -1).
2176                     unsigned parenthesesFrameLocation = term->frameLocation;
2177                     Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*)), TrustedImm32(-1));
2178
2179                     if (term->quantityType == QuantifierGreedy) {
2180                         // For Greedy parentheses, we skip after having already tried going
2181                         // through the subpattern, so if we get here we're done.
2182                         YarrOp& beginOp = m_ops[op.m_previousOp];
2183                         beginOp.m_jumps.append(hadSkipped);
2184                     } else {
2185                         // For NonGreedy parentheses, we try skipping the subpattern first,
2186                         // so if we get here we need to try running through the subpattern
2187                         // next. Jump back to the start of the parentheses in the forwards
2188                         // matching path.
2189                         ASSERT(term->quantityType == QuantifierNonGreedy);
2190                         YarrOp& beginOp = m_ops[op.m_previousOp];
2191                         hadSkipped.linkTo(beginOp.m_reentry, this);
2192                     }
2193
2194                     m_backtrackingState.fallthrough();
2195                 }
2196
2197                 m_backtrackingState.append(op.m_jumps);
2198                 break;
2199             }
2200
2201             // OpParenthesesSubpatternTerminalBegin/End
2202             //
2203             // Terminal subpatterns will always match - there is nothing after them to
2204             // force a backtrack, and they have a minimum count of 0, and as such will
2205             // always produce an acceptable result.
2206             case OpParenthesesSubpatternTerminalBegin: {
2207                 // We will backtrack to this point once the subpattern cannot match any
2208                 // more. Since no match is accepted as a successful match (we are Greedy
2209                 // quantified with a minimum of zero) jump back to the forwards matching
2210                 // path at the end.
2211                 YarrOp& endOp = m_ops[op.m_nextOp];
2212                 m_backtrackingState.linkTo(endOp.m_reentry, this);
2213                 break;
2214             }
2215             case OpParenthesesSubpatternTerminalEnd:
2216                 // We should never be backtracking to here (hence the 'terminal' in the name).
2217                 ASSERT(m_backtrackingState.isEmpty());
2218                 m_backtrackingState.append(op.m_jumps);
2219                 break;
2220
2221             // OpParentheticalAssertionBegin/End
2222             case OpParentheticalAssertionBegin: {
2223                 PatternTerm* term = op.m_term;
2224                 YarrOp& endOp = m_ops[op.m_nextOp];
2225
2226                 // We need to handle the backtracks upon backtracking back out
2227                 // of a parenthetical assertion if either we need to correct
2228                 // the input index, or the assertion was inverted.
2229                 if (op.m_checkAdjust || term->invert()) {
2230                      m_backtrackingState.link(this);
2231
2232                     if (op.m_checkAdjust)
2233                         add32(Imm32(op.m_checkAdjust), index);
2234
2235                     // In an inverted assertion failure to match the subpattern
2236                     // is treated as a successful match - jump to the end of the
2237                     // subpattern. We already have adjusted the input position
2238                     // back to that before the assertion, which is correct.
2239                     if (term->invert())
2240                         jump(endOp.m_reentry);
2241
2242                     m_backtrackingState.fallthrough();
2243                 }
2244
2245                 // The End node's jump list will contain any backtracks into
2246                 // the end of the assertion. Also, if inverted, we will have
2247                 // added the failure caused by a successful match to this.
2248                 m_backtrackingState.append(endOp.m_jumps);
2249
2250                 m_checked += op.m_checkAdjust;
2251                 break;
2252             }
2253             case OpParentheticalAssertionEnd: {
2254                 // FIXME: We should really be clearing any nested subpattern
2255                 // matches on bailing out from after the pattern. Firefox has
2256                 // this bug too (presumably because they use YARR!)
2257
2258                 // Never backtrack into an assertion; later failures bail to before the begin.
2259                 m_backtrackingState.takeBacktracksToJumpList(op.m_jumps, this);
2260
2261                 YarrOp& lastOp = m_ops[op.m_previousOp];
2262                 m_checked -= lastOp.m_checkAdjust;
2263                 break;
2264             }
2265
2266             case OpMatchFailed:
2267                 break;
2268             }
2269
2270         } while (opIndex);
2271     }
2272
2273     // Compilation methods:
2274     // ====================
2275
2276     // opCompileParenthesesSubpattern
2277     // Emits ops for a subpattern (set of parentheses). These consist
2278     // of a set of alternatives wrapped in an outer set of nodes for
2279     // the parentheses.
2280     // Supported types of parentheses are 'Once' (quantityCount == 1)
2281     // and 'Terminal' (non-capturing parentheses quantified as greedy
2282     // and infinite).
2283     // Alternatives will use the 'Simple' set of ops if either the
2284     // subpattern is terminal (in which case we will never need to
2285     // backtrack), or if the subpattern only contains one alternative.
2286     void opCompileParenthesesSubpattern(PatternTerm* term)
2287     {
2288         YarrOpCode parenthesesBeginOpCode;
2289         YarrOpCode parenthesesEndOpCode;
2290         YarrOpCode alternativeBeginOpCode = OpSimpleNestedAlternativeBegin;
2291         YarrOpCode alternativeNextOpCode = OpSimpleNestedAlternativeNext;
2292         YarrOpCode alternativeEndOpCode = OpSimpleNestedAlternativeEnd;
2293
2294         // We can currently only compile quantity 1 subpatterns that are
2295         // not copies. We generate a copy in the case of a range quantifier,
2296         // e.g. /(?:x){3,9}/, or /(?:x)+/ (These are effectively expanded to
2297         // /(?:x){3,3}(?:x){0,6}/ and /(?:x)(?:x)*/ repectively). The problem
2298         // comes where the subpattern is capturing, in which case we would
2299         // need to restore the capture from the first subpattern upon a
2300         // failure in the second.
2301         if (term->quantityCount == 1 && !term->parentheses.isCopy) {
2302             // Select the 'Once' nodes.
2303             parenthesesBeginOpCode = OpParenthesesSubpatternOnceBegin;
2304             parenthesesEndOpCode = OpParenthesesSubpatternOnceEnd;
2305
2306             // If there is more than one alternative we cannot use the 'simple' nodes.
2307             if (term->parentheses.disjunction->m_alternatives.size() != 1) {
2308                 alternativeBeginOpCode = OpNestedAlternativeBegin;
2309                 alternativeNextOpCode = OpNestedAlternativeNext;
2310                 alternativeEndOpCode = OpNestedAlternativeEnd;
2311             }
2312         } else if (term->parentheses.isTerminal) {
2313             // Select the 'Terminal' nodes.
2314             parenthesesBeginOpCode = OpParenthesesSubpatternTerminalBegin;
2315             parenthesesEndOpCode = OpParenthesesSubpatternTerminalEnd;
2316         } else {
2317             // This subpattern is not supported by the JIT.
2318             m_shouldFallBack = true;
2319             return;
2320         }
2321
2322         size_t parenBegin = m_ops.size();
2323         m_ops.append(parenthesesBeginOpCode);
2324
2325         m_ops.append(alternativeBeginOpCode);
2326         m_ops.last().m_previousOp = notFound;
2327         m_ops.last().m_term = term;
2328         Vector<OwnPtr<PatternAlternative> >& alternatives =  term->parentheses.disjunction->m_alternatives;
2329         for (unsigned i = 0; i < alternatives.size(); ++i) {
2330             size_t lastOpIndex = m_ops.size() - 1;
2331
2332             PatternAlternative* nestedAlternative = alternatives[i].get();
2333             opCompileAlternative(nestedAlternative);
2334
2335             size_t thisOpIndex = m_ops.size();
2336             m_ops.append(YarrOp(alternativeNextOpCode));
2337
2338             YarrOp& lastOp = m_ops[lastOpIndex];
2339             YarrOp& thisOp = m_ops[thisOpIndex];
2340
2341             lastOp.m_alternative = nestedAlternative;
2342             lastOp.m_nextOp = thisOpIndex;
2343             thisOp.m_previousOp = lastOpIndex;
2344             thisOp.m_term = term;
2345         }
2346         YarrOp& lastOp = m_ops.last();
2347         ASSERT(lastOp.m_op == alternativeNextOpCode);
2348         lastOp.m_op = alternativeEndOpCode;
2349         lastOp.m_alternative = 0;
2350         lastOp.m_nextOp = notFound;
2351
2352         size_t parenEnd = m_ops.size();
2353         m_ops.append(parenthesesEndOpCode);
2354
2355         m_ops[parenBegin].m_term = term;
2356         m_ops[parenBegin].m_previousOp = notFound;
2357         m_ops[parenBegin].m_nextOp = parenEnd;
2358         m_ops[parenEnd].m_term = term;
2359         m_ops[parenEnd].m_previousOp = parenBegin;
2360         m_ops[parenEnd].m_nextOp = notFound;
2361     }
2362
2363     // opCompileParentheticalAssertion
2364     // Emits ops for a parenthetical assertion. These consist of an
2365     // OpSimpleNestedAlternativeBegin/Next/End set of nodes wrapping
2366     // the alternatives, with these wrapped by an outer pair of
2367     // OpParentheticalAssertionBegin/End nodes.
2368     // We can always use the OpSimpleNestedAlternative nodes in the
2369     // case of parenthetical assertions since these only ever match
2370     // once, and will never backtrack back into the assertion.
2371     void opCompileParentheticalAssertion(PatternTerm* term)
2372     {
2373         size_t parenBegin = m_ops.size();
2374         m_ops.append(OpParentheticalAssertionBegin);
2375
2376         m_ops.append(OpSimpleNestedAlternativeBegin);
2377         m_ops.last().m_previousOp = notFound;
2378         m_ops.last().m_term = term;
2379         Vector<OwnPtr<PatternAlternative> >& alternatives =  term->parentheses.disjunction->m_alternatives;
2380         for (unsigned i = 0; i < alternatives.size(); ++i) {
2381             size_t lastOpIndex = m_ops.size() - 1;
2382
2383             PatternAlternative* nestedAlternative = alternatives[i].get();
2384             opCompileAlternative(nestedAlternative);
2385
2386             size_t thisOpIndex = m_ops.size();
2387             m_ops.append(YarrOp(OpSimpleNestedAlternativeNext));
2388
2389             YarrOp& lastOp = m_ops[lastOpIndex];
2390             YarrOp& thisOp = m_ops[thisOpIndex];
2391
2392             lastOp.m_alternative = nestedAlternative;
2393             lastOp.m_nextOp = thisOpIndex;
2394             thisOp.m_previousOp = lastOpIndex;
2395             thisOp.m_term = term;
2396         }
2397         YarrOp& lastOp = m_ops.last();
2398         ASSERT(lastOp.m_op == OpSimpleNestedAlternativeNext);
2399         lastOp.m_op = OpSimpleNestedAlternativeEnd;
2400         lastOp.m_alternative = 0;
2401         lastOp.m_nextOp = notFound;
2402
2403         size_t parenEnd = m_ops.size();
2404         m_ops.append(OpParentheticalAssertionEnd);
2405
2406         m_ops[parenBegin].m_term = term;
2407         m_ops[parenBegin].m_previousOp = notFound;
2408         m_ops[parenBegin].m_nextOp = parenEnd;
2409         m_ops[parenEnd].m_term = term;
2410         m_ops[parenEnd].m_previousOp = parenBegin;
2411         m_ops[parenEnd].m_nextOp = notFound;
2412     }
2413
2414     // opCompileAlternative
2415     // Called to emit nodes for all terms in an alternative.
2416     void opCompileAlternative(PatternAlternative* alternative)
2417     {
2418         optimizeAlternative(alternative);
2419
2420         for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
2421             PatternTerm* term = &alternative->m_terms[i];
2422
2423             switch (term->type) {
2424             case PatternTerm::TypeParenthesesSubpattern:
2425                 opCompileParenthesesSubpattern(term);
2426                 break;
2427
2428             case PatternTerm::TypeParentheticalAssertion:
2429                 opCompileParentheticalAssertion(term);
2430                 break;
2431
2432             default:
2433                 m_ops.append(term);
2434             }
2435         }
2436     }
2437
2438     // opCompileBody
2439     // This method compiles the body disjunction of the regular expression.
2440     // The body consists of two sets of alternatives - zero or more 'once
2441     // through' (BOL anchored) alternatives, followed by zero or more
2442     // repeated alternatives.
2443     // For each of these two sets of alteratives, if not empty they will be
2444     // wrapped in a set of OpBodyAlternativeBegin/Next/End nodes (with the
2445     // 'begin' node referencing the first alternative, and 'next' nodes
2446     // referencing any further alternatives. The begin/next/end nodes are
2447     // linked together in a doubly linked list. In the case of repeating
2448     // alternatives, the end node is also linked back to the beginning.
2449     // If no repeating alternatives exist, then a OpMatchFailed node exists
2450     // to return the failing result.
2451     void opCompileBody(PatternDisjunction* disjunction)
2452     {
2453         Vector<OwnPtr<PatternAlternative> >& alternatives = disjunction->m_alternatives;
2454         size_t currentAlternativeIndex = 0;
2455
2456         // Emit the 'once through' alternatives.
2457         if (alternatives.size() && alternatives[0]->onceThrough()) {
2458             m_ops.append(YarrOp(OpBodyAlternativeBegin));
2459             m_ops.last().m_previousOp = notFound;
2460
2461             do {
2462                 size_t lastOpIndex = m_ops.size() - 1;
2463                 PatternAlternative* alternative = alternatives[currentAlternativeIndex].get();
2464                 opCompileAlternative(alternative);
2465
2466                 size_t thisOpIndex = m_ops.size();
2467                 m_ops.append(YarrOp(OpBodyAlternativeNext));
2468
2469                 YarrOp& lastOp = m_ops[lastOpIndex];
2470                 YarrOp& thisOp = m_ops[thisOpIndex];
2471
2472                 lastOp.m_alternative = alternative;
2473                 lastOp.m_nextOp = thisOpIndex;
2474                 thisOp.m_previousOp = lastOpIndex;
2475                 
2476                 ++currentAlternativeIndex;
2477             } while (currentAlternativeIndex < alternatives.size() && alternatives[currentAlternativeIndex]->onceThrough());
2478
2479             YarrOp& lastOp = m_ops.last();
2480
2481             ASSERT(lastOp.m_op == OpBodyAlternativeNext);
2482             lastOp.m_op = OpBodyAlternativeEnd;
2483             lastOp.m_alternative = 0;
2484             lastOp.m_nextOp = notFound;
2485         }
2486
2487         if (currentAlternativeIndex == alternatives.size()) {
2488             m_ops.append(YarrOp(OpMatchFailed));
2489             return;
2490         }
2491
2492         // Emit the repeated alternatives.
2493         size_t repeatLoop = m_ops.size();
2494         m_ops.append(YarrOp(OpBodyAlternativeBegin));
2495         m_ops.last().m_previousOp = notFound;
2496         do {
2497             size_t lastOpIndex = m_ops.size() - 1;
2498             PatternAlternative* alternative = alternatives[currentAlternativeIndex].get();
2499             ASSERT(!alternative->onceThrough());
2500             opCompileAlternative(alternative);
2501
2502             size_t thisOpIndex = m_ops.size();
2503             m_ops.append(YarrOp(OpBodyAlternativeNext));
2504
2505             YarrOp& lastOp = m_ops[lastOpIndex];
2506             YarrOp& thisOp = m_ops[thisOpIndex];
2507
2508             lastOp.m_alternative = alternative;
2509             lastOp.m_nextOp = thisOpIndex;
2510             thisOp.m_previousOp = lastOpIndex;
2511             
2512             ++currentAlternativeIndex;
2513         } while (currentAlternativeIndex < alternatives.size());
2514         YarrOp& lastOp = m_ops.last();
2515         ASSERT(lastOp.m_op == OpBodyAlternativeNext);
2516         lastOp.m_op = OpBodyAlternativeEnd;
2517         lastOp.m_alternative = 0;
2518         lastOp.m_nextOp = repeatLoop;
2519     }
2520
2521     void generateEnter()
2522     {
2523 #if CPU(X86_64)
2524         push(X86Registers::ebp);
2525         move(stackPointerRegister, X86Registers::ebp);
2526         push(X86Registers::ebx);
2527         // The ABI doesn't guarantee the upper bits are zero on unsigned arguments, so clear them ourselves.
2528         zeroExtend32ToPtr(index, index);
2529         zeroExtend32ToPtr(length, length);
2530 #if OS(WINDOWS)
2531         if (compileMode == IncludeSubpatterns)
2532             loadPtr(Address(X86Registers::ebp, 6 * sizeof(void*)), output);
2533 #endif
2534 #elif CPU(X86)
2535         push(X86Registers::ebp);
2536         move(stackPointerRegister, X86Registers::ebp);
2537         // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
2538         push(X86Registers::ebx);
2539         push(X86Registers::edi);
2540         push(X86Registers::esi);
2541         // load output into edi (2 = saved ebp + return address).
2542     #if COMPILER(MSVC)
2543         loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input);
2544         loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index);
2545         loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length);
2546         if (compileMode == IncludeSubpatterns)
2547             loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output);
2548     #else
2549         if (compileMode == IncludeSubpatterns)
2550             loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output);
2551     #endif
2552 #elif CPU(ARM)
2553         push(ARMRegisters::r4);
2554         push(ARMRegisters::r5);
2555         push(ARMRegisters::r6);
2556 #if CPU(ARM_TRADITIONAL)
2557         push(ARMRegisters::r8); // scratch register
2558 #endif
2559         if (compileMode == IncludeSubpatterns)
2560             move(ARMRegisters::r3, output);
2561 #elif CPU(SH4)
2562         push(SH4Registers::r11);
2563         push(SH4Registers::r13);
2564 #elif CPU(MIPS)
2565         // Do nothing.
2566 #endif
2567     }
2568
2569     void generateReturn()
2570     {
2571 #if CPU(X86_64)
2572 #if OS(WINDOWS)
2573         // Store the return value in the allocated space pointed by rcx.
2574         store64(returnRegister, Address(X86Registers::ecx));
2575         store64(returnRegister2, Address(X86Registers::ecx, sizeof(void*)));
2576         move(X86Registers::ecx, returnRegister);
2577 #endif
2578         pop(X86Registers::ebx);
2579         pop(X86Registers::ebp);
2580 #elif CPU(X86)
2581         pop(X86Registers::esi);
2582         pop(X86Registers::edi);
2583         pop(X86Registers::ebx);
2584         pop(X86Registers::ebp);
2585 #elif CPU(ARM)
2586 #if CPU(ARM_TRADITIONAL)
2587         pop(ARMRegisters::r8); // scratch register
2588 #endif
2589         pop(ARMRegisters::r6);
2590         pop(ARMRegisters::r5);
2591         pop(ARMRegisters::r4);
2592 #elif CPU(SH4)
2593         pop(SH4Registers::r13);
2594         pop(SH4Registers::r11);
2595 #elif CPU(MIPS)
2596         // Do nothing
2597 #endif
2598         ret();
2599     }
2600
2601 public:
2602     YarrGenerator(YarrPattern& pattern, YarrCharSize charSize)
2603         : m_pattern(pattern)
2604         , m_charSize(charSize)
2605         , m_charScale(m_charSize == Char8 ? TimesOne: TimesTwo)
2606         , m_shouldFallBack(false)
2607         , m_checked(0)
2608     {
2609     }
2610
2611     void compile(VM* vm, YarrCodeBlock& jitObject)
2612     {
2613         generateEnter();
2614
2615         Jump hasInput = checkInput();
2616         move(TrustedImmPtr((void*)WTF::notFound), returnRegister);
2617         move(TrustedImm32(0), returnRegister2);
2618         generateReturn();
2619         hasInput.link(this);
2620
2621         if (compileMode == IncludeSubpatterns) {
2622             for (unsigned i = 0; i < m_pattern.m_numSubpatterns + 1; ++i)
2623                 store32(TrustedImm32(-1), Address(output, (i << 1) * sizeof(int)));
2624         }
2625
2626         if (!m_pattern.m_body->m_hasFixedSize)
2627             setMatchStart(index);
2628
2629         initCallFrame();
2630
2631         // Compile the pattern to the internal 'YarrOp' representation.
2632         opCompileBody(m_pattern.m_body);
2633
2634         // If we encountered anything we can't handle in the JIT code
2635         // (e.g. backreferences) then return early.
2636         if (m_shouldFallBack) {
2637             jitObject.setFallBack(true);
2638             return;
2639         }
2640
2641         generate();
2642         backtrack();
2643
2644         // Link & finalize the code.
2645         LinkBuffer linkBuffer(*vm, this, REGEXP_CODE_ID);
2646         m_backtrackingState.linkDataLabels(linkBuffer);
2647
2648         if (compileMode == MatchOnly) {
2649             if (m_charSize == Char8)
2650                 jitObject.set8BitCodeMatchOnly(FINALIZE_CODE(linkBuffer, ("Match-only 8-bit regular expression")));
2651             else
2652                 jitObject.set16BitCodeMatchOnly(FINALIZE_CODE(linkBuffer, ("Match-only 16-bit regular expression")));
2653         } else {
2654             if (m_charSize == Char8)
2655                 jitObject.set8BitCode(FINALIZE_CODE(linkBuffer, ("8-bit regular expression")));
2656             else
2657                 jitObject.set16BitCode(FINALIZE_CODE(linkBuffer, ("16-bit regular expression")));
2658         }
2659         jitObject.setFallBack(m_shouldFallBack);
2660     }
2661
2662 private:
2663     YarrPattern& m_pattern;
2664
2665     YarrCharSize m_charSize;
2666
2667     Scale m_charScale;
2668
2669     // Used to detect regular expression constructs that are not currently
2670     // supported in the JIT; fall back to the interpreter when this is detected.
2671     bool m_shouldFallBack;
2672
2673     // The regular expression expressed as a linear sequence of operations.
2674     Vector<YarrOp, 128> m_ops;
2675
2676     // This records the current input offset being applied due to the current
2677     // set of alternatives we are nested within. E.g. when matching the
2678     // character 'b' within the regular expression /abc/, we will know that
2679     // the minimum size for the alternative is 3, checked upon entry to the
2680     // alternative, and that 'b' is at offset 1 from the start, and as such
2681     // when matching 'b' we need to apply an offset of -2 to the load.
2682     //
2683     // FIXME: This should go away. Rather than tracking this value throughout
2684     // code generation, we should gather this information up front & store it
2685     // on the YarrOp structure.
2686     int m_checked;
2687
2688     // This class records state whilst generating the backtracking path of code.
2689     BacktrackingState m_backtrackingState;
2690 };
2691
2692 void jitCompile(YarrPattern& pattern, YarrCharSize charSize, VM* vm, YarrCodeBlock& jitObject, YarrJITCompileMode mode)
2693 {
2694     if (mode == MatchOnly)
2695         YarrGenerator<MatchOnly>(pattern, charSize).compile(vm, jitObject);
2696     else
2697         YarrGenerator<IncludeSubpatterns>(pattern, charSize).compile(vm, jitObject);
2698 }
2699
2700 }}
2701
2702 #endif