Macro assembler branch8 & 16 methods vary in treatment of upper bits
[WebKit-https.git] / Source / JavaScriptCore / yarr / YarrJIT.cpp
1 /*
2  * Copyright (C) 2009 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
24  */
25
26 #include "config.h"
27 #include "YarrJIT.h"
28
29 #include "ASCIICType.h"
30 #include "LinkBuffer.h"
31 #include "Yarr.h"
32
33 #if ENABLE(YARR_JIT)
34
35 using namespace WTF;
36
37 namespace JSC { namespace Yarr {
38
39 class YarrGenerator : private MacroAssembler {
40     friend void jitCompile(JSGlobalData*, YarrCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
41
42 #if CPU(ARM)
43     static const RegisterID input = ARMRegisters::r0;
44     static const RegisterID index = ARMRegisters::r1;
45     static const RegisterID length = ARMRegisters::r2;
46     static const RegisterID output = ARMRegisters::r4;
47
48     static const RegisterID regT0 = ARMRegisters::r5;
49     static const RegisterID regT1 = ARMRegisters::r6;
50
51     static const RegisterID returnRegister = ARMRegisters::r0;
52 #elif CPU(MIPS)
53     static const RegisterID input = MIPSRegisters::a0;
54     static const RegisterID index = MIPSRegisters::a1;
55     static const RegisterID length = MIPSRegisters::a2;
56     static const RegisterID output = MIPSRegisters::a3;
57
58     static const RegisterID regT0 = MIPSRegisters::t4;
59     static const RegisterID regT1 = MIPSRegisters::t5;
60
61     static const RegisterID returnRegister = MIPSRegisters::v0;
62 #elif CPU(SH4)
63     static const RegisterID input = SH4Registers::r4;
64     static const RegisterID index = SH4Registers::r5;
65     static const RegisterID length = SH4Registers::r6;
66     static const RegisterID output = SH4Registers::r7;
67
68     static const RegisterID regT0 = SH4Registers::r0;
69     static const RegisterID regT1 = SH4Registers::r1;
70
71     static const RegisterID returnRegister = SH4Registers::r0;
72 #elif CPU(X86)
73     static const RegisterID input = X86Registers::eax;
74     static const RegisterID index = X86Registers::edx;
75     static const RegisterID length = X86Registers::ecx;
76     static const RegisterID output = X86Registers::edi;
77
78     static const RegisterID regT0 = X86Registers::ebx;
79     static const RegisterID regT1 = X86Registers::esi;
80
81     static const RegisterID returnRegister = X86Registers::eax;
82 #elif CPU(X86_64)
83     static const RegisterID input = X86Registers::edi;
84     static const RegisterID index = X86Registers::esi;
85     static const RegisterID length = X86Registers::edx;
86     static const RegisterID output = X86Registers::ecx;
87
88     static const RegisterID regT0 = X86Registers::eax;
89     static const RegisterID regT1 = X86Registers::ebx;
90
91     static const RegisterID returnRegister = X86Registers::eax;
92 #endif
93
94     void optimizeAlternative(PatternAlternative* alternative)
95     {
96         if (!alternative->m_terms.size())
97             return;
98
99         for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) {
100             PatternTerm& term = alternative->m_terms[i];
101             PatternTerm& nextTerm = alternative->m_terms[i + 1];
102
103             if ((term.type == PatternTerm::TypeCharacterClass)
104                 && (term.quantityType == QuantifierFixedCount)
105                 && (nextTerm.type == PatternTerm::TypePatternCharacter)
106                 && (nextTerm.quantityType == QuantifierFixedCount)) {
107                 PatternTerm termCopy = term;
108                 alternative->m_terms[i] = nextTerm;
109                 alternative->m_terms[i + 1] = termCopy;
110             }
111         }
112     }
113
114     void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount)
115     {
116         do {
117             // pick which range we're going to generate
118             int which = count >> 1;
119             char lo = ranges[which].begin;
120             char hi = ranges[which].end;
121
122             // check if there are any ranges or matches below lo.  If not, just jl to failure -
123             // if there is anything else to check, check that first, if it falls through jmp to failure.
124             if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
125                 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
126
127                 // generate code for all ranges before this one
128                 if (which)
129                     matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
130
131                 while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
132                     matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
133                     ++*matchIndex;
134                 }
135                 failures.append(jump());
136
137                 loOrAbove.link(this);
138             } else if (which) {
139                 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
140
141                 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
142                 failures.append(jump());
143
144                 loOrAbove.link(this);
145             } else
146                 failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));
147
148             while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
149                 ++*matchIndex;
150
151             matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
152             // fall through to here, the value is above hi.
153
154             // shuffle along & loop around if there are any more matches to handle.
155             unsigned next = which + 1;
156             ranges += next;
157             count -= next;
158         } while (count);
159     }
160
161     void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
162     {
163         if (charClass->m_table) {
164             ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table->m_table));
165             matchDest.append(branchTest8(charClass->m_table->m_inverted ? Zero : NonZero, tableEntry));
166             return;
167         }
168         Jump unicodeFail;
169         if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
170             Jump isAscii = branch32(LessThanOrEqual, character, TrustedImm32(0x7f));
171
172             if (charClass->m_matchesUnicode.size()) {
173                 for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
174                     UChar ch = charClass->m_matchesUnicode[i];
175                     matchDest.append(branch32(Equal, character, Imm32(ch)));
176                 }
177             }
178
179             if (charClass->m_rangesUnicode.size()) {
180                 for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
181                     UChar lo = charClass->m_rangesUnicode[i].begin;
182                     UChar hi = charClass->m_rangesUnicode[i].end;
183
184                     Jump below = branch32(LessThan, character, Imm32(lo));
185                     matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
186                     below.link(this);
187                 }
188             }
189
190             unicodeFail = jump();
191             isAscii.link(this);
192         }
193
194         if (charClass->m_ranges.size()) {
195             unsigned matchIndex = 0;
196             JumpList failures;
197             matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size());
198             while (matchIndex < charClass->m_matches.size())
199                 matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++])));
200
201             failures.link(this);
202         } else if (charClass->m_matches.size()) {
203             // optimization: gather 'a','A' etc back together, can mask & test once.
204             Vector<char> matchesAZaz;
205
206             for (unsigned i = 0; i < charClass->m_matches.size(); ++i) {
207                 char ch = charClass->m_matches[i];
208                 if (m_pattern.m_ignoreCase) {
209                     if (isASCIILower(ch)) {
210                         matchesAZaz.append(ch);
211                         continue;
212                     }
213                     if (isASCIIUpper(ch))
214                         continue;
215                 }
216                 matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
217             }
218
219             if (unsigned countAZaz = matchesAZaz.size()) {
220                 or32(TrustedImm32(32), character);
221                 for (unsigned i = 0; i < countAZaz; ++i)
222                     matchDest.append(branch32(Equal, character, TrustedImm32(matchesAZaz[i])));
223             }
224         }
225
226         if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size())
227             unicodeFail.link(this);
228     }
229
230     // Jumps if input not available; will have (incorrectly) incremented already!
231     Jump jumpIfNoAvailableInput(unsigned countToCheck = 0)
232     {
233         if (countToCheck)
234             add32(Imm32(countToCheck), index);
235         return branch32(Above, index, length);
236     }
237
238     Jump jumpIfAvailableInput(unsigned countToCheck)
239     {
240         add32(Imm32(countToCheck), index);
241         return branch32(BelowOrEqual, index, length);
242     }
243
244     Jump checkInput()
245     {
246         return branch32(BelowOrEqual, index, length);
247     }
248
249     Jump atEndOfInput()
250     {
251         return branch32(Equal, index, length);
252     }
253
254     Jump notAtEndOfInput()
255     {
256         return branch32(NotEqual, index, length);
257     }
258
259     Jump jumpIfCharNotEquals(UChar ch, int inputPosition, RegisterID character)
260     {
261         readCharacter(inputPosition, character);
262
263         // For case-insesitive compares, non-ascii characters that have different
264         // upper & lower case representations are converted to a character class.
265         ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
266         if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
267             or32(TrustedImm32(32), character);
268             ch = Unicode::toLower(ch);
269         }
270
271         return branch32(NotEqual, character, Imm32(ch));
272     }
273
274     void readCharacter(int inputPosition, RegisterID reg)
275     {
276         if (m_charSize == Char8)
277             load8(BaseIndex(input, index, TimesOne, inputPosition * sizeof(char)), reg);
278         else
279             load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg);
280     }
281
282     void storeToFrame(RegisterID reg, unsigned frameLocation)
283     {
284         poke(reg, frameLocation);
285     }
286
287     void storeToFrame(TrustedImm32 imm, unsigned frameLocation)
288     {
289         poke(imm, frameLocation);
290     }
291
292     DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
293     {
294         return storePtrWithPatch(TrustedImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*)));
295     }
296
297     void loadFromFrame(unsigned frameLocation, RegisterID reg)
298     {
299         peek(reg, frameLocation);
300     }
301
302     void loadFromFrameAndJump(unsigned frameLocation)
303     {
304         jump(Address(stackPointerRegister, frameLocation * sizeof(void*)));
305     }
306
307     enum YarrOpCode {
308         // These nodes wrap body alternatives - those in the main disjunction,
309         // rather than subpatterns or assertions. These are chained together in
310         // a doubly linked list, with a 'begin' node for the first alternative,
311         // a 'next' node for each subsequent alternative, and an 'end' node at
312         // the end. In the case of repeating alternatives, the 'end' node also
313         // has a reference back to 'begin'.
314         OpBodyAlternativeBegin,
315         OpBodyAlternativeNext,
316         OpBodyAlternativeEnd,
317         // Similar to the body alternatives, but used for subpatterns with two
318         // or more alternatives.
319         OpNestedAlternativeBegin,
320         OpNestedAlternativeNext,
321         OpNestedAlternativeEnd,
322         // Used for alternatives in subpatterns where there is only a single
323         // alternative (backtrackingis easier in these cases), or for alternatives
324         // which never need to be backtracked (those in parenthetical assertions,
325         // terminal subpatterns).
326         OpSimpleNestedAlternativeBegin,
327         OpSimpleNestedAlternativeNext,
328         OpSimpleNestedAlternativeEnd,
329         // Used to wrap 'Once' subpattern matches (quantityCount == 1).
330         OpParenthesesSubpatternOnceBegin,
331         OpParenthesesSubpatternOnceEnd,
332         // Used to wrap 'Terminal' subpattern matches (at the end of the regexp).
333         OpParenthesesSubpatternTerminalBegin,
334         OpParenthesesSubpatternTerminalEnd,
335         // Used to wrap parenthetical assertions.
336         OpParentheticalAssertionBegin,
337         OpParentheticalAssertionEnd,
338         // Wraps all simple terms (pattern characters, character classes).
339         OpTerm,
340         // Where an expression contains only 'once through' body alternatives
341         // and no repeating ones, this op is used to return match failure.
342         OpMatchFailed
343     };
344
345     // This structure is used to hold the compiled opcode information,
346     // including reference back to the original PatternTerm/PatternAlternatives,
347     // and JIT compilation data structures.
348     struct YarrOp {
349         explicit YarrOp(PatternTerm* term)
350             : m_op(OpTerm)
351             , m_term(term)
352             , m_isDeadCode(false)
353         {
354         }
355
356         explicit YarrOp(YarrOpCode op)
357             : m_op(op)
358             , m_isDeadCode(false)
359         {
360         }
361
362         // The operation, as a YarrOpCode, and also a reference to the PatternTerm.
363         YarrOpCode m_op;
364         PatternTerm* m_term;
365
366         // For alternatives, this holds the PatternAlternative and doubly linked
367         // references to this alternative's siblings. In the case of the
368         // OpBodyAlternativeEnd node at the end of a section of repeating nodes,
369         // m_nextOp will reference the OpBodyAlternativeBegin node of the first
370         // repeating alternative.
371         PatternAlternative* m_alternative;
372         size_t m_previousOp;
373         size_t m_nextOp;
374
375         // Used to record a set of Jumps out of the generated code, typically
376         // used for jumps out to backtracking code, and a single reentry back
377         // into the code for a node (likely where a backtrack will trigger
378         // rematching).
379         Label m_reentry;
380         JumpList m_jumps;
381
382         // This flag is used to null out the second pattern character, when
383         // two are fused to match a pair together.
384         bool m_isDeadCode;
385
386         // Currently used in the case of some of the more complex management of
387         // 'm_checked', to cache the offset used in this alternative, to avoid
388         // recalculating it.
389         int m_checkAdjust;
390
391         // Used by OpNestedAlternativeNext/End to hold the pointer to the
392         // value that will be pushed into the pattern's frame to return to,
393         // upon backtracking back into the disjunction.
394         DataLabelPtr m_returnAddress;
395     };
396
397     // BacktrackingState
398     // This class encapsulates information about the state of code generation
399     // whilst generating the code for backtracking, when a term fails to match.
400     // Upon entry to code generation of the backtracking code for a given node,
401     // the Backtracking state will hold references to all control flow sources
402     // that are outputs in need of further backtracking from the prior node
403     // generated (which is the subsequent operation in the regular expression,
404     // and in the m_ops Vector, since we generated backtracking backwards).
405     // These references to control flow take the form of:
406     //  - A jump list of jumps, to be linked to code that will backtrack them
407     //    further.
408     //  - A set of DataLabelPtr values, to be populated with values to be
409     //    treated effectively as return addresses backtracking into complex
410     //    subpatterns.
411     //  - A flag indicating that the current sequence of generated code up to
412     //    this point requires backtracking.
413     class BacktrackingState {
414     public:
415         BacktrackingState()
416             : m_pendingFallthrough(false)
417         {
418         }
419
420         // Add a jump or jumps, a return address, or set the flag indicating
421         // that the current 'fallthrough' control flow requires backtracking.
422         void append(const Jump& jump)
423         {
424             m_laterFailures.append(jump);
425         }
426         void append(JumpList& jumpList)
427         {
428             m_laterFailures.append(jumpList);
429         }
430         void append(const DataLabelPtr& returnAddress)
431         {
432             m_pendingReturns.append(returnAddress);
433         }
434         void fallthrough()
435         {
436             ASSERT(!m_pendingFallthrough);
437             m_pendingFallthrough = true;
438         }
439
440         // These methods clear the backtracking state, either linking to the
441         // current location, a provided label, or copying the backtracking out
442         // to a JumpList. All actions may require code generation to take place,
443         // and as such are passed a pointer to the assembler.
444         void link(MacroAssembler* assembler)
445         {
446             if (m_pendingReturns.size()) {
447                 Label here(assembler);
448                 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
449                     m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
450                 m_pendingReturns.clear();
451             }
452             m_laterFailures.link(assembler);
453             m_laterFailures.clear();
454             m_pendingFallthrough = false;
455         }
456         void linkTo(Label label, MacroAssembler* assembler)
457         {
458             if (m_pendingReturns.size()) {
459                 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
460                     m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], label));
461                 m_pendingReturns.clear();
462             }
463             if (m_pendingFallthrough)
464                 assembler->jump(label);
465             m_laterFailures.linkTo(label, assembler);
466             m_laterFailures.clear();
467             m_pendingFallthrough = false;
468         }
469         void takeBacktracksToJumpList(JumpList& jumpList, MacroAssembler* assembler)
470         {
471             if (m_pendingReturns.size()) {
472                 Label here(assembler);
473                 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
474                     m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
475                 m_pendingReturns.clear();
476                 m_pendingFallthrough = true;
477             }
478             if (m_pendingFallthrough)
479                 jumpList.append(assembler->jump());
480             jumpList.append(m_laterFailures);
481             m_laterFailures.clear();
482             m_pendingFallthrough = false;
483         }
484
485         bool isEmpty()
486         {
487             return m_laterFailures.empty() && m_pendingReturns.isEmpty() && !m_pendingFallthrough;
488         }
489
490         // Called at the end of code generation to link all return addresses.
491         void linkDataLabels(LinkBuffer& linkBuffer)
492         {
493             ASSERT(isEmpty());
494             for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
495                 linkBuffer.patch(m_backtrackRecords[i].m_dataLabel, linkBuffer.locationOf(m_backtrackRecords[i].m_backtrackLocation));
496         }
497
498     private:
499         struct ReturnAddressRecord {
500             ReturnAddressRecord(DataLabelPtr dataLabel, Label backtrackLocation)
501                 : m_dataLabel(dataLabel)
502                 , m_backtrackLocation(backtrackLocation)
503             {
504             }
505
506             DataLabelPtr m_dataLabel;
507             Label m_backtrackLocation;
508         };
509
510         JumpList m_laterFailures;
511         bool m_pendingFallthrough;
512         Vector<DataLabelPtr, 4> m_pendingReturns;
513         Vector<ReturnAddressRecord, 4> m_backtrackRecords;
514     };
515
516     // Generation methods:
517     // ===================
518
519     // This method provides a default implementation of backtracking common
520     // to many terms; terms commonly jump out of the forwards  matching path
521     // on any failed conditions, and add these jumps to the m_jumps list. If
522     // no special handling is required we can often just backtrack to m_jumps.
523     void backtrackTermDefault(size_t opIndex)
524     {
525         YarrOp& op = m_ops[opIndex];
526         m_backtrackingState.append(op.m_jumps);
527     }
528
529     void generateAssertionBOL(size_t opIndex)
530     {
531         YarrOp& op = m_ops[opIndex];
532         PatternTerm* term = op.m_term;
533
534         if (m_pattern.m_multiline) {
535             const RegisterID character = regT0;
536
537             JumpList matchDest;
538             if (!term->inputPosition)
539                 matchDest.append(branch32(Equal, index, Imm32(m_checked)));
540
541             readCharacter((term->inputPosition - m_checked) - 1, character);
542             matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
543             op.m_jumps.append(jump());
544
545             matchDest.link(this);
546         } else {
547             // Erk, really should poison out these alternatives early. :-/
548             if (term->inputPosition)
549                 op.m_jumps.append(jump());
550             else
551                 op.m_jumps.append(branch32(NotEqual, index, Imm32(m_checked)));
552         }
553     }
554     void backtrackAssertionBOL(size_t opIndex)
555     {
556         backtrackTermDefault(opIndex);
557     }
558
559     void generateAssertionEOL(size_t opIndex)
560     {
561         YarrOp& op = m_ops[opIndex];
562         PatternTerm* term = op.m_term;
563
564         if (m_pattern.m_multiline) {
565             const RegisterID character = regT0;
566
567             JumpList matchDest;
568             if (term->inputPosition == m_checked)
569                 matchDest.append(atEndOfInput());
570
571             readCharacter(term->inputPosition - m_checked, character);
572             matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
573             op.m_jumps.append(jump());
574
575             matchDest.link(this);
576         } else {
577             if (term->inputPosition == m_checked)
578                 op.m_jumps.append(notAtEndOfInput());
579             // Erk, really should poison out these alternatives early. :-/
580             else
581                 op.m_jumps.append(jump());
582         }
583     }
584     void backtrackAssertionEOL(size_t opIndex)
585     {
586         backtrackTermDefault(opIndex);
587     }
588
589     // Also falls though on nextIsNotWordChar.
590     void matchAssertionWordchar(size_t opIndex, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
591     {
592         YarrOp& op = m_ops[opIndex];
593         PatternTerm* term = op.m_term;
594
595         const RegisterID character = regT0;
596
597         if (term->inputPosition == m_checked)
598             nextIsNotWordChar.append(atEndOfInput());
599
600         readCharacter((term->inputPosition - m_checked), character);
601         matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass());
602     }
603
604     void generateAssertionWordBoundary(size_t opIndex)
605     {
606         YarrOp& op = m_ops[opIndex];
607         PatternTerm* term = op.m_term;
608
609         const RegisterID character = regT0;
610
611         Jump atBegin;
612         JumpList matchDest;
613         if (!term->inputPosition)
614             atBegin = branch32(Equal, index, Imm32(m_checked));
615         readCharacter((term->inputPosition - m_checked) - 1, character);
616         matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass());
617         if (!term->inputPosition)
618             atBegin.link(this);
619
620         // We fall through to here if the last character was not a wordchar.
621         JumpList nonWordCharThenWordChar;
622         JumpList nonWordCharThenNonWordChar;
623         if (term->invert()) {
624             matchAssertionWordchar(opIndex, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
625             nonWordCharThenWordChar.append(jump());
626         } else {
627             matchAssertionWordchar(opIndex, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
628             nonWordCharThenNonWordChar.append(jump());
629         }
630         op.m_jumps.append(nonWordCharThenNonWordChar);
631
632         // We jump here if the last character was a wordchar.
633         matchDest.link(this);
634         JumpList wordCharThenWordChar;
635         JumpList wordCharThenNonWordChar;
636         if (term->invert()) {
637             matchAssertionWordchar(opIndex, wordCharThenNonWordChar, wordCharThenWordChar);
638             wordCharThenWordChar.append(jump());
639         } else {
640             matchAssertionWordchar(opIndex, wordCharThenWordChar, wordCharThenNonWordChar);
641             // This can fall-though!
642         }
643
644         op.m_jumps.append(wordCharThenWordChar);
645
646         nonWordCharThenWordChar.link(this);
647         wordCharThenNonWordChar.link(this);
648     }
649     void backtrackAssertionWordBoundary(size_t opIndex)
650     {
651         backtrackTermDefault(opIndex);
652     }
653
654     void generatePatternCharacterOnce(size_t opIndex)
655     {
656         YarrOp& op = m_ops[opIndex];
657
658         // m_ops always ends with a OpBodyAlternativeEnd or OpMatchFailed
659         // node, so there must always be at least one more node.
660         ASSERT(opIndex + 1 < m_ops.size());
661         YarrOp& nextOp = m_ops[opIndex + 1];
662
663         if (op.m_isDeadCode)
664             return;
665
666         PatternTerm* term = op.m_term;
667         UChar ch = term->patternCharacter;
668
669         if ((ch > 0xff) && (m_charSize == Char8)) {
670             // Have a 16 bit pattern character and an 8 bit string - short circuit
671             op.m_jumps.append(jump());
672             return;
673         }
674
675         const RegisterID character = regT0;
676
677         if (nextOp.m_op == OpTerm) {
678             PatternTerm* nextTerm = nextOp.m_term;
679             if (nextTerm->type == PatternTerm::TypePatternCharacter
680                 && nextTerm->quantityType == QuantifierFixedCount
681                 && nextTerm->quantityCount == 1
682                 && nextTerm->inputPosition == (term->inputPosition + 1)) {
683
684                 UChar ch2 = nextTerm->patternCharacter;
685
686                 int shiftAmount = m_charSize == Char8 ? 8 : 16;
687                 int mask = 0;
688                 int chPair = ch | (ch2 << shiftAmount);
689
690                 // For case-insesitive compares, non-ascii characters that have different
691                 // upper & lower case representations are converted to a character class.
692                 ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
693                 if (m_pattern.m_ignoreCase) {
694                     if (isASCIIAlpha(ch))
695                         mask |= 32;
696                     if (isASCIIAlpha(ch2))
697                         mask |= 32 << shiftAmount;
698                 }
699
700                 if (m_charSize == Char8) {
701                     BaseIndex address(input, index, TimesOne, (term->inputPosition - m_checked) * sizeof(char));
702                     load16(address, character);
703                 } else {
704                     BaseIndex address(input, index, TimesTwo, (term->inputPosition - m_checked) * sizeof(UChar));
705                     load32WithUnalignedHalfWords(address, character);
706                 }
707                 if (mask)
708                     or32(Imm32(mask), character);
709                 op.m_jumps.append(branch32(NotEqual, character, Imm32(chPair | mask)));
710
711                 nextOp.m_isDeadCode = true;
712                 return;
713             }
714         }
715
716         op.m_jumps.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character));
717     }
718     void backtrackPatternCharacterOnce(size_t opIndex)
719     {
720         backtrackTermDefault(opIndex);
721     }
722
723     void generatePatternCharacterFixed(size_t opIndex)
724     {
725         YarrOp& op = m_ops[opIndex];
726         PatternTerm* term = op.m_term;
727         UChar ch = term->patternCharacter;
728
729         const RegisterID character = regT0;
730         const RegisterID countRegister = regT1;
731
732         move(index, countRegister);
733         sub32(Imm32(term->quantityCount.unsafeGet()), countRegister);
734
735         Label loop(this);
736         BaseIndex address(input, countRegister, m_charScale, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(m_charSize == Char8 ? sizeof(char) : sizeof(UChar))).unsafeGet());
737
738         if (m_charSize == Char8)
739             load8(address, character);
740         else
741             load16(address, character);
742
743         // For case-insesitive compares, non-ascii characters that have different
744         // upper & lower case representations are converted to a character class.
745         ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
746         if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
747             or32(TrustedImm32(32), character);
748             ch = Unicode::toLower(ch);
749         }
750
751         op.m_jumps.append(branch32(NotEqual, character, Imm32(ch)));
752         add32(TrustedImm32(1), countRegister);
753         branch32(NotEqual, countRegister, index).linkTo(loop, this);
754     }
755     void backtrackPatternCharacterFixed(size_t opIndex)
756     {
757         backtrackTermDefault(opIndex);
758     }
759
760     void generatePatternCharacterGreedy(size_t opIndex)
761     {
762         YarrOp& op = m_ops[opIndex];
763         PatternTerm* term = op.m_term;
764         UChar ch = term->patternCharacter;
765
766         const RegisterID character = regT0;
767         const RegisterID countRegister = regT1;
768
769         move(TrustedImm32(0), countRegister);
770
771         if ((ch > 0xff) && (m_charSize == Char8)) {
772             // Have a 16 bit pattern character and an 8 bit string - short circuit
773             op.m_jumps.append(jump());
774         } else {
775             JumpList failures;
776             Label loop(this);
777             failures.append(atEndOfInput());
778             failures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character));
779
780             add32(TrustedImm32(1), countRegister);
781             add32(TrustedImm32(1), index);
782             if (term->quantityCount == quantifyInfinite)
783                 jump(loop);
784             else
785                 branch32(NotEqual, countRegister, Imm32(term->quantityCount.unsafeGet())).linkTo(loop, this);
786
787             failures.link(this);
788         }
789         op.m_reentry = label();
790
791         storeToFrame(countRegister, term->frameLocation);
792
793     }
794     void backtrackPatternCharacterGreedy(size_t opIndex)
795     {
796         YarrOp& op = m_ops[opIndex];
797         PatternTerm* term = op.m_term;
798
799         const RegisterID countRegister = regT1;
800
801         m_backtrackingState.link(this);
802
803         loadFromFrame(term->frameLocation, countRegister);
804         m_backtrackingState.append(branchTest32(Zero, countRegister));
805         sub32(TrustedImm32(1), countRegister);
806         sub32(TrustedImm32(1), index);
807         jump(op.m_reentry);
808     }
809
810     void generatePatternCharacterNonGreedy(size_t opIndex)
811     {
812         YarrOp& op = m_ops[opIndex];
813         PatternTerm* term = op.m_term;
814
815         const RegisterID countRegister = regT1;
816
817         move(TrustedImm32(0), countRegister);
818         op.m_reentry = label();
819         storeToFrame(countRegister, term->frameLocation);
820     }
821     void backtrackPatternCharacterNonGreedy(size_t opIndex)
822     {
823         YarrOp& op = m_ops[opIndex];
824         PatternTerm* term = op.m_term;
825         UChar ch = term->patternCharacter;
826
827         const RegisterID character = regT0;
828         const RegisterID countRegister = regT1;
829
830         JumpList nonGreedyFailures;
831
832         m_backtrackingState.link(this);
833
834         loadFromFrame(term->frameLocation, countRegister);
835
836         if ((ch > 0xff) && (m_charSize == Char8)) {
837             // Have a 16 bit pattern character and an 8 bit string - short circuit
838             nonGreedyFailures.append(jump());
839         } else {
840             nonGreedyFailures.append(atEndOfInput());
841             if (term->quantityCount != quantifyInfinite)
842                 nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet())));
843             nonGreedyFailures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character));
844
845             add32(TrustedImm32(1), countRegister);
846             add32(TrustedImm32(1), index);
847
848             jump(op.m_reentry);
849         }
850         nonGreedyFailures.link(this);
851
852         sub32(countRegister, index);
853         m_backtrackingState.fallthrough();
854     }
855
856     void generateCharacterClassOnce(size_t opIndex)
857     {
858         YarrOp& op = m_ops[opIndex];
859         PatternTerm* term = op.m_term;
860
861         const RegisterID character = regT0;
862
863         JumpList matchDest;
864         readCharacter(term->inputPosition - m_checked, character);
865         matchCharacterClass(character, matchDest, term->characterClass);
866
867         if (term->invert())
868             op.m_jumps.append(matchDest);
869         else {
870             op.m_jumps.append(jump());
871             matchDest.link(this);
872         }
873     }
874     void backtrackCharacterClassOnce(size_t opIndex)
875     {
876         backtrackTermDefault(opIndex);
877     }
878
879     void generateCharacterClassFixed(size_t opIndex)
880     {
881         YarrOp& op = m_ops[opIndex];
882         PatternTerm* term = op.m_term;
883
884         const RegisterID character = regT0;
885         const RegisterID countRegister = regT1;
886
887         move(index, countRegister);
888         sub32(Imm32(term->quantityCount.unsafeGet()), countRegister);
889
890         Label loop(this);
891         JumpList matchDest;
892         if (m_charSize == Char8)
893             load8(BaseIndex(input, countRegister, TimesOne, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(sizeof(char))).unsafeGet()), character);
894         else
895             load16(BaseIndex(input, countRegister, TimesTwo, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(sizeof(UChar))).unsafeGet()), character);
896         matchCharacterClass(character, matchDest, term->characterClass);
897
898         if (term->invert())
899             op.m_jumps.append(matchDest);
900         else {
901             op.m_jumps.append(jump());
902             matchDest.link(this);
903         }
904
905         add32(TrustedImm32(1), countRegister);
906         branch32(NotEqual, countRegister, index).linkTo(loop, this);
907     }
908     void backtrackCharacterClassFixed(size_t opIndex)
909     {
910         backtrackTermDefault(opIndex);
911     }
912
913     void generateCharacterClassGreedy(size_t opIndex)
914     {
915         YarrOp& op = m_ops[opIndex];
916         PatternTerm* term = op.m_term;
917
918         const RegisterID character = regT0;
919         const RegisterID countRegister = regT1;
920
921         move(TrustedImm32(0), countRegister);
922
923         JumpList failures;
924         Label loop(this);
925         failures.append(atEndOfInput());
926
927         if (term->invert()) {
928             readCharacter(term->inputPosition - m_checked, character);
929             matchCharacterClass(character, failures, term->characterClass);
930         } else {
931             JumpList matchDest;
932             readCharacter(term->inputPosition - m_checked, character);
933             matchCharacterClass(character, matchDest, term->characterClass);
934             failures.append(jump());
935             matchDest.link(this);
936         }
937
938         add32(TrustedImm32(1), countRegister);
939         add32(TrustedImm32(1), index);
940         if (term->quantityCount != quantifyInfinite) {
941             branch32(NotEqual, countRegister, Imm32(term->quantityCount.unsafeGet())).linkTo(loop, this);
942             failures.append(jump());
943         } else
944             jump(loop);
945
946         failures.link(this);
947         op.m_reentry = label();
948
949         storeToFrame(countRegister, term->frameLocation);
950     }
951     void backtrackCharacterClassGreedy(size_t opIndex)
952     {
953         YarrOp& op = m_ops[opIndex];
954         PatternTerm* term = op.m_term;
955
956         const RegisterID countRegister = regT1;
957
958         m_backtrackingState.link(this);
959
960         loadFromFrame(term->frameLocation, countRegister);
961         m_backtrackingState.append(branchTest32(Zero, countRegister));
962         sub32(TrustedImm32(1), countRegister);
963         sub32(TrustedImm32(1), index);
964         jump(op.m_reentry);
965     }
966
967     void generateCharacterClassNonGreedy(size_t opIndex)
968     {
969         YarrOp& op = m_ops[opIndex];
970         PatternTerm* term = op.m_term;
971
972         const RegisterID countRegister = regT1;
973
974         move(TrustedImm32(0), countRegister);
975         op.m_reentry = label();
976         storeToFrame(countRegister, term->frameLocation);
977     }
978     void backtrackCharacterClassNonGreedy(size_t opIndex)
979     {
980         YarrOp& op = m_ops[opIndex];
981         PatternTerm* term = op.m_term;
982
983         const RegisterID character = regT0;
984         const RegisterID countRegister = regT1;
985
986         JumpList nonGreedyFailures;
987
988         m_backtrackingState.link(this);
989
990         Label backtrackBegin(this);
991         loadFromFrame(term->frameLocation, countRegister);
992
993         nonGreedyFailures.append(atEndOfInput());
994         nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet())));
995
996         JumpList matchDest;
997         readCharacter(term->inputPosition - m_checked, character);
998         matchCharacterClass(character, matchDest, term->characterClass);
999
1000         if (term->invert())
1001             nonGreedyFailures.append(matchDest);
1002         else {
1003             nonGreedyFailures.append(jump());
1004             matchDest.link(this);
1005         }
1006
1007         add32(TrustedImm32(1), countRegister);
1008         add32(TrustedImm32(1), index);
1009
1010         jump(op.m_reentry);
1011
1012         nonGreedyFailures.link(this);
1013         sub32(countRegister, index);
1014         m_backtrackingState.fallthrough();
1015     }
1016
1017     void generateDotStarEnclosure(size_t opIndex)
1018     {
1019         YarrOp& op = m_ops[opIndex];
1020         PatternTerm* term = op.m_term;
1021
1022         const RegisterID character = regT0;
1023         const RegisterID matchPos = regT1;
1024
1025         JumpList foundBeginningNewLine;
1026         JumpList saveStartIndex;
1027         JumpList foundEndingNewLine;
1028
1029         if (m_pattern.m_body->m_hasFixedSize) {
1030             move(index, matchPos);
1031             sub32(Imm32(m_checked), matchPos);
1032         } else
1033             load32(Address(output), matchPos);
1034
1035         saveStartIndex.append(branchTest32(Zero, matchPos));
1036         Label findBOLLoop(this);
1037         sub32(TrustedImm32(1), matchPos);
1038         if (m_charSize == Char8)
1039             load8(BaseIndex(input, matchPos, TimesOne, 0), character);
1040         else
1041             load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
1042         matchCharacterClass(character, foundBeginningNewLine, m_pattern.newlineCharacterClass());
1043         branchTest32(NonZero, matchPos).linkTo(findBOLLoop, this);
1044         saveStartIndex.append(jump());
1045
1046         foundBeginningNewLine.link(this);
1047         add32(TrustedImm32(1), matchPos); // Advance past newline
1048         saveStartIndex.link(this);
1049
1050         if (!m_pattern.m_multiline && term->anchors.bolAnchor)
1051             op.m_jumps.append(branchTest32(NonZero, matchPos));
1052
1053         store32(matchPos, Address(output));
1054
1055         move(index, matchPos);
1056
1057         Label findEOLLoop(this);        
1058         foundEndingNewLine.append(branch32(Equal, matchPos, length));
1059         if (m_charSize == Char8)
1060             load8(BaseIndex(input, matchPos, TimesOne, 0), character);
1061         else
1062             load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
1063         matchCharacterClass(character, foundEndingNewLine, m_pattern.newlineCharacterClass());
1064         add32(TrustedImm32(1), matchPos);
1065         jump(findEOLLoop);
1066
1067         foundEndingNewLine.link(this);
1068
1069         if (!m_pattern.m_multiline && term->anchors.eolAnchor)
1070             op.m_jumps.append(branch32(NotEqual, matchPos, length));
1071
1072         move(matchPos, index);
1073     }
1074
1075     void backtrackDotStarEnclosure(size_t opIndex)
1076     {
1077         backtrackTermDefault(opIndex);
1078     }
1079     
1080     // Code generation/backtracking for simple terms
1081     // (pattern characters, character classes, and assertions).
1082     // These methods farm out work to the set of functions above.
1083     void generateTerm(size_t opIndex)
1084     {
1085         YarrOp& op = m_ops[opIndex];
1086         PatternTerm* term = op.m_term;
1087
1088         switch (term->type) {
1089         case PatternTerm::TypePatternCharacter:
1090             switch (term->quantityType) {
1091             case QuantifierFixedCount:
1092                 if (term->quantityCount == 1)
1093                     generatePatternCharacterOnce(opIndex);
1094                 else
1095                     generatePatternCharacterFixed(opIndex);
1096                 break;
1097             case QuantifierGreedy:
1098                 generatePatternCharacterGreedy(opIndex);
1099                 break;
1100             case QuantifierNonGreedy:
1101                 generatePatternCharacterNonGreedy(opIndex);
1102                 break;
1103             }
1104             break;
1105
1106         case PatternTerm::TypeCharacterClass:
1107             switch (term->quantityType) {
1108             case QuantifierFixedCount:
1109                 if (term->quantityCount == 1)
1110                     generateCharacterClassOnce(opIndex);
1111                 else
1112                     generateCharacterClassFixed(opIndex);
1113                 break;
1114             case QuantifierGreedy:
1115                 generateCharacterClassGreedy(opIndex);
1116                 break;
1117             case QuantifierNonGreedy:
1118                 generateCharacterClassNonGreedy(opIndex);
1119                 break;
1120             }
1121             break;
1122
1123         case PatternTerm::TypeAssertionBOL:
1124             generateAssertionBOL(opIndex);
1125             break;
1126
1127         case PatternTerm::TypeAssertionEOL:
1128             generateAssertionEOL(opIndex);
1129             break;
1130
1131         case PatternTerm::TypeAssertionWordBoundary:
1132             generateAssertionWordBoundary(opIndex);
1133             break;
1134
1135         case PatternTerm::TypeForwardReference:
1136             break;
1137
1138         case PatternTerm::TypeParenthesesSubpattern:
1139         case PatternTerm::TypeParentheticalAssertion:
1140             ASSERT_NOT_REACHED();
1141         case PatternTerm::TypeBackReference:
1142             m_shouldFallBack = true;
1143             break;
1144         case PatternTerm::TypeDotStarEnclosure:
1145             generateDotStarEnclosure(opIndex);
1146             break;
1147         }
1148     }
1149     void backtrackTerm(size_t opIndex)
1150     {
1151         YarrOp& op = m_ops[opIndex];
1152         PatternTerm* term = op.m_term;
1153
1154         switch (term->type) {
1155         case PatternTerm::TypePatternCharacter:
1156             switch (term->quantityType) {
1157             case QuantifierFixedCount:
1158                 if (term->quantityCount == 1)
1159                     backtrackPatternCharacterOnce(opIndex);
1160                 else
1161                     backtrackPatternCharacterFixed(opIndex);
1162                 break;
1163             case QuantifierGreedy:
1164                 backtrackPatternCharacterGreedy(opIndex);
1165                 break;
1166             case QuantifierNonGreedy:
1167                 backtrackPatternCharacterNonGreedy(opIndex);
1168                 break;
1169             }
1170             break;
1171
1172         case PatternTerm::TypeCharacterClass:
1173             switch (term->quantityType) {
1174             case QuantifierFixedCount:
1175                 if (term->quantityCount == 1)
1176                     backtrackCharacterClassOnce(opIndex);
1177                 else
1178                     backtrackCharacterClassFixed(opIndex);
1179                 break;
1180             case QuantifierGreedy:
1181                 backtrackCharacterClassGreedy(opIndex);
1182                 break;
1183             case QuantifierNonGreedy:
1184                 backtrackCharacterClassNonGreedy(opIndex);
1185                 break;
1186             }
1187             break;
1188
1189         case PatternTerm::TypeAssertionBOL:
1190             backtrackAssertionBOL(opIndex);
1191             break;
1192
1193         case PatternTerm::TypeAssertionEOL:
1194             backtrackAssertionEOL(opIndex);
1195             break;
1196
1197         case PatternTerm::TypeAssertionWordBoundary:
1198             backtrackAssertionWordBoundary(opIndex);
1199             break;
1200
1201         case PatternTerm::TypeForwardReference:
1202             break;
1203
1204         case PatternTerm::TypeParenthesesSubpattern:
1205         case PatternTerm::TypeParentheticalAssertion:
1206             ASSERT_NOT_REACHED();
1207
1208         case PatternTerm::TypeDotStarEnclosure:
1209             backtrackDotStarEnclosure(opIndex);
1210             break;
1211
1212         case PatternTerm::TypeBackReference:
1213             m_shouldFallBack = true;
1214             break;
1215         }
1216     }
1217
1218     void generate()
1219     {
1220         // Forwards generate the matching code.
1221         ASSERT(m_ops.size());
1222         size_t opIndex = 0;
1223
1224         do {
1225             YarrOp& op = m_ops[opIndex];
1226             switch (op.m_op) {
1227
1228             case OpTerm:
1229                 generateTerm(opIndex);
1230                 break;
1231
1232             // OpBodyAlternativeBegin/Next/End
1233             //
1234             // These nodes wrap the set of alternatives in the body of the regular expression.
1235             // There may be either one or two chains of OpBodyAlternative nodes, one representing
1236             // the 'once through' sequence of alternatives (if any exist), and one representing
1237             // the repeating alternatives (again, if any exist).
1238             //
1239             // Upon normal entry to the Begin alternative, we will check that input is available.
1240             // Reentry to the Begin alternative will take place after the check has taken place,
1241             // and will assume that the input position has already been progressed as appropriate.
1242             //
1243             // Entry to subsequent Next/End alternatives occurs when the prior alternative has
1244             // successfully completed a match - return a success state from JIT code.
1245             //
1246             // Next alternatives allow for reentry optimized to suit backtracking from its
1247             // preceding alternative. It expects the input position to still be set to a position
1248             // appropriate to its predecessor, and it will only perform an input check if the
1249             // predecessor had a minimum size less than its own.
1250             //
1251             // In the case 'once through' expressions, the End node will also have a reentry
1252             // point to jump to when the last alternative fails. Again, this expects the input
1253             // position to still reflect that expected by the prior alternative.
1254             case OpBodyAlternativeBegin: {
1255                 PatternAlternative* alternative = op.m_alternative;
1256
1257                 // Upon entry at the head of the set of alternatives, check if input is available
1258                 // to run the first alternative. (This progresses the input position).
1259                 op.m_jumps.append(jumpIfNoAvailableInput(alternative->m_minimumSize));
1260                 // We will reenter after the check, and assume the input position to have been
1261                 // set as appropriate to this alternative.
1262                 op.m_reentry = label();
1263
1264                 m_checked += alternative->m_minimumSize;
1265                 break;
1266             }
1267             case OpBodyAlternativeNext:
1268             case OpBodyAlternativeEnd: {
1269                 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
1270                 PatternAlternative* alternative = op.m_alternative;
1271
1272                 // If we get here, the prior alternative matched - return success.
1273                 
1274                 // Adjust the stack pointer to remove the pattern's frame.
1275                 if (m_pattern.m_body->m_callFrameSize)
1276                     addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
1277
1278                 // Load appropriate values into the return register and the first output
1279                 // slot, and return. In the case of pattern with a fixed size, we will
1280                 // not have yet set the value in the first 
1281                 ASSERT(index != returnRegister);
1282                 if (m_pattern.m_body->m_hasFixedSize) {
1283                     move(index, returnRegister);
1284                     if (priorAlternative->m_minimumSize)
1285                         sub32(Imm32(priorAlternative->m_minimumSize), returnRegister);
1286                     store32(returnRegister, output);
1287                 } else
1288                     load32(Address(output), returnRegister);
1289                 store32(index, Address(output, 4));
1290                 generateReturn();
1291
1292                 // This is the divide between the tail of the prior alternative, above, and
1293                 // the head of the subsequent alternative, below.
1294
1295                 if (op.m_op == OpBodyAlternativeNext) {
1296                     // This is the reentry point for the Next alternative. We expect any code
1297                     // that jumps here to do so with the input position matching that of the
1298                     // PRIOR alteranative, and we will only check input availability if we
1299                     // need to progress it forwards.
1300                     op.m_reentry = label();
1301                     if (alternative->m_minimumSize > priorAlternative->m_minimumSize) {
1302                         add32(Imm32(alternative->m_minimumSize - priorAlternative->m_minimumSize), index);
1303                         op.m_jumps.append(jumpIfNoAvailableInput());
1304                     } else if (priorAlternative->m_minimumSize > alternative->m_minimumSize)
1305                         sub32(Imm32(priorAlternative->m_minimumSize - alternative->m_minimumSize), index);
1306                 } else if (op.m_nextOp == notFound) {
1307                     // This is the reentry point for the End of 'once through' alternatives,
1308                     // jumped to when the las alternative fails to match.
1309                     op.m_reentry = label();
1310                     sub32(Imm32(priorAlternative->m_minimumSize), index);
1311                 }
1312
1313                 if (op.m_op == OpBodyAlternativeNext)
1314                     m_checked += alternative->m_minimumSize;
1315                 m_checked -= priorAlternative->m_minimumSize;
1316                 break;
1317             }
1318
1319             // OpSimpleNestedAlternativeBegin/Next/End
1320             // OpNestedAlternativeBegin/Next/End
1321             //
1322             // These nodes are used to handle sets of alternatives that are nested within
1323             // subpatterns and parenthetical assertions. The 'simple' forms are used where
1324             // we do not need to be able to backtrack back into any alternative other than
1325             // the last, the normal forms allow backtracking into any alternative.
1326             //
1327             // Each Begin/Next node is responsible for planting an input check to ensure
1328             // sufficient input is available on entry. Next nodes additionally need to
1329             // jump to the end - Next nodes use the End node's m_jumps list to hold this
1330             // set of jumps.
1331             //
1332             // In the non-simple forms, successful alternative matches must store a
1333             // 'return address' using a DataLabelPtr, used to store the address to jump
1334             // to when backtracking, to get to the code for the appropriate alternative.
1335             case OpSimpleNestedAlternativeBegin:
1336             case OpNestedAlternativeBegin: {
1337                 PatternTerm* term = op.m_term;
1338                 PatternAlternative* alternative = op.m_alternative;
1339                 PatternDisjunction* disjunction = term->parentheses.disjunction;
1340
1341                 // Calculate how much input we need to check for, and if non-zero check.
1342                 op.m_checkAdjust = alternative->m_minimumSize;
1343                 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
1344                     op.m_checkAdjust -= disjunction->m_minimumSize;
1345                 if (op.m_checkAdjust)
1346                     op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust));
1347  
1348                 m_checked += op.m_checkAdjust;
1349                 break;
1350             }
1351             case OpSimpleNestedAlternativeNext:
1352             case OpNestedAlternativeNext: {
1353                 PatternTerm* term = op.m_term;
1354                 PatternAlternative* alternative = op.m_alternative;
1355                 PatternDisjunction* disjunction = term->parentheses.disjunction;
1356
1357                 // In the non-simple case, store a 'return address' so we can backtrack correctly.
1358                 if (op.m_op == OpNestedAlternativeNext) {
1359                     unsigned parenthesesFrameLocation = term->frameLocation;
1360                     unsigned alternativeFrameLocation = parenthesesFrameLocation;
1361                     if (term->quantityType != QuantifierFixedCount)
1362                         alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1363                     op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation);
1364                 }
1365
1366                 // If we reach here then the last alternative has matched - jump to the
1367                 // End node, to skip over any further alternatives.
1368                 //
1369                 // FIXME: this is logically O(N^2) (though N can be expected to be very
1370                 // small). We could avoid this either by adding an extra jump to the JIT
1371                 // data structures, or by making backtracking code that jumps to Next
1372                 // alternatives are responsible for checking that input is available (if
1373                 // we didn't need to plant the input checks, then m_jumps would be free).
1374                 YarrOp* endOp = &m_ops[op.m_nextOp];
1375                 while (endOp->m_nextOp != notFound) {
1376                     ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
1377                     endOp = &m_ops[endOp->m_nextOp];
1378                 }
1379                 ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
1380                 endOp->m_jumps.append(jump());
1381
1382                 // This is the entry point for the next alternative.
1383                 op.m_reentry = label();
1384
1385                 // Calculate how much input we need to check for, and if non-zero check.
1386                 op.m_checkAdjust = alternative->m_minimumSize;
1387                 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
1388                     op.m_checkAdjust -= disjunction->m_minimumSize;
1389                 if (op.m_checkAdjust)
1390                     op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust));
1391
1392                 YarrOp& lastOp = m_ops[op.m_previousOp];
1393                 m_checked -= lastOp.m_checkAdjust;
1394                 m_checked += op.m_checkAdjust;
1395                 break;
1396             }
1397             case OpSimpleNestedAlternativeEnd:
1398             case OpNestedAlternativeEnd: {
1399                 PatternTerm* term = op.m_term;
1400
1401                 // In the non-simple case, store a 'return address' so we can backtrack correctly.
1402                 if (op.m_op == OpNestedAlternativeEnd) {
1403                     unsigned parenthesesFrameLocation = term->frameLocation;
1404                     unsigned alternativeFrameLocation = parenthesesFrameLocation;
1405                     if (term->quantityType != QuantifierFixedCount)
1406                         alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1407                     op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation);
1408                 }
1409
1410                 // If this set of alternatives contains more than one alternative,
1411                 // then the Next nodes will have planted jumps to the End, and added
1412                 // them to this node's m_jumps list.
1413                 op.m_jumps.link(this);
1414                 op.m_jumps.clear();
1415
1416                 YarrOp& lastOp = m_ops[op.m_previousOp];
1417                 m_checked -= lastOp.m_checkAdjust;
1418                 break;
1419             }
1420
1421             // OpParenthesesSubpatternOnceBegin/End
1422             //
1423             // These nodes support (optionally) capturing subpatterns, that have a
1424             // quantity count of 1 (this covers fixed once, and ?/?? quantifiers). 
1425             case OpParenthesesSubpatternOnceBegin: {
1426                 PatternTerm* term = op.m_term;
1427                 unsigned parenthesesFrameLocation = term->frameLocation;
1428                 const RegisterID indexTemporary = regT0;
1429                 ASSERT(term->quantityCount == 1);
1430
1431                 // Upon entry to a Greedy quantified set of parenthese store the index.
1432                 // We'll use this for two purposes:
1433                 //  - To indicate which iteration we are on of mathing the remainder of
1434                 //    the expression after the parentheses - the first, including the
1435                 //    match within the parentheses, or the second having skipped over them.
1436                 //  - To check for empty matches, which must be rejected.
1437                 //
1438                 // At the head of a NonGreedy set of parentheses we'll immediately set the
1439                 // value on the stack to -1 (indicating a match skipping the subpattern),
1440                 // and plant a jump to the end. We'll also plant a label to backtrack to
1441                 // to reenter the subpattern later, with a store to set up index on the
1442                 // second iteration.
1443                 //
1444                 // FIXME: for capturing parens, could use the index in the capture array?
1445                 if (term->quantityType == QuantifierGreedy)
1446                     storeToFrame(index, parenthesesFrameLocation);
1447                 else if (term->quantityType == QuantifierNonGreedy) {
1448                     storeToFrame(TrustedImm32(-1), parenthesesFrameLocation);
1449                     op.m_jumps.append(jump());
1450                     op.m_reentry = label();
1451                     storeToFrame(index, parenthesesFrameLocation);
1452                 }
1453
1454                 // If the parenthese are capturing, store the starting index value to the
1455                 // captures array, offsetting as necessary.
1456                 //
1457                 // FIXME: could avoid offsetting this value in JIT code, apply
1458                 // offsets only afterwards, at the point the results array is
1459                 // being accessed.
1460                 if (term->capture()) {
1461                     int offsetId = term->parentheses.subpatternId << 1;
1462                     int inputOffset = term->inputPosition - m_checked;
1463                     if (term->quantityType == QuantifierFixedCount)
1464                         inputOffset -= term->parentheses.disjunction->m_minimumSize;
1465                     if (inputOffset) {
1466                         move(index, indexTemporary);
1467                         add32(Imm32(inputOffset), indexTemporary);
1468                         store32(indexTemporary, Address(output, offsetId * sizeof(int)));
1469                     } else
1470                         store32(index, Address(output, offsetId * sizeof(int)));
1471                 }
1472                 break;
1473             }
1474             case OpParenthesesSubpatternOnceEnd: {
1475                 PatternTerm* term = op.m_term;
1476                 unsigned parenthesesFrameLocation = term->frameLocation;
1477                 const RegisterID indexTemporary = regT0;
1478                 ASSERT(term->quantityCount == 1);
1479
1480                 // For Greedy/NonGreedy quantified parentheses, we must reject zero length
1481                 // matches. If the minimum size is know to be non-zero we need not check.
1482                 if (term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize)
1483                     op.m_jumps.append(branch32(Equal, index, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*))));
1484
1485                 // If the parenthese are capturing, store the ending index value to the
1486                 // captures array, offsetting as necessary.
1487                 //
1488                 // FIXME: could avoid offsetting this value in JIT code, apply
1489                 // offsets only afterwards, at the point the results array is
1490                 // being accessed.
1491                 if (term->capture()) {
1492                     int offsetId = (term->parentheses.subpatternId << 1) + 1;
1493                     int inputOffset = term->inputPosition - m_checked;
1494                     if (inputOffset) {
1495                         move(index, indexTemporary);
1496                         add32(Imm32(inputOffset), indexTemporary);
1497                         store32(indexTemporary, Address(output, offsetId * sizeof(int)));
1498                     } else
1499                         store32(index, Address(output, offsetId * sizeof(int)));
1500                 }
1501
1502                 // If the parentheses are quantified Greedy then add a label to jump back
1503                 // to if get a failed match from after the parentheses. For NonGreedy
1504                 // parentheses, link the jump from before the subpattern to here.
1505                 if (term->quantityType == QuantifierGreedy)
1506                     op.m_reentry = label();
1507                 else if (term->quantityType == QuantifierNonGreedy) {
1508                     YarrOp& beginOp = m_ops[op.m_previousOp];
1509                     beginOp.m_jumps.link(this);
1510                 }
1511                 break;
1512             }
1513
1514             // OpParenthesesSubpatternTerminalBegin/End
1515             case OpParenthesesSubpatternTerminalBegin: {
1516                 PatternTerm* term = op.m_term;
1517                 ASSERT(term->quantityType == QuantifierGreedy);
1518                 ASSERT(term->quantityCount == quantifyInfinite);
1519                 ASSERT(!term->capture());
1520
1521                 // Upon entry set a label to loop back to.
1522                 op.m_reentry = label();
1523
1524                 // Store the start index of the current match; we need to reject zero
1525                 // length matches.
1526                 storeToFrame(index, term->frameLocation);
1527                 break;
1528             }
1529             case OpParenthesesSubpatternTerminalEnd: {
1530                 PatternTerm* term = op.m_term;
1531
1532                 // Check for zero length matches - if the match is non-zero, then we
1533                 // can accept it & loop back up to the head of the subpattern.
1534                 YarrOp& beginOp = m_ops[op.m_previousOp];
1535                 branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)), beginOp.m_reentry);
1536
1537                 // Reject the match - backtrack back into the subpattern.
1538                 op.m_jumps.append(jump());
1539
1540                 // This is the entry point to jump to when we stop matching - we will
1541                 // do so once the subpattern cannot match any more.
1542                 op.m_reentry = label();
1543                 break;
1544             }
1545
1546             // OpParentheticalAssertionBegin/End
1547             case OpParentheticalAssertionBegin: {
1548                 PatternTerm* term = op.m_term;
1549
1550                 // Store the current index - assertions should not update index, so
1551                 // we will need to restore it upon a successful match.
1552                 unsigned parenthesesFrameLocation = term->frameLocation;
1553                 storeToFrame(index, parenthesesFrameLocation);
1554
1555                 // Check 
1556                 op.m_checkAdjust = m_checked - term->inputPosition;
1557                 if (op.m_checkAdjust)
1558                     sub32(Imm32(op.m_checkAdjust), index);
1559
1560                 m_checked -= op.m_checkAdjust;
1561                 break;
1562             }
1563             case OpParentheticalAssertionEnd: {
1564                 PatternTerm* term = op.m_term;
1565
1566                 // Restore the input index value.
1567                 unsigned parenthesesFrameLocation = term->frameLocation;
1568                 loadFromFrame(parenthesesFrameLocation, index);
1569
1570                 // If inverted, a successful match of the assertion must be treated
1571                 // as a failure, so jump to backtracking.
1572                 if (term->invert()) {
1573                     op.m_jumps.append(jump());
1574                     op.m_reentry = label();
1575                 }
1576
1577                 YarrOp& lastOp = m_ops[op.m_previousOp];
1578                 m_checked += lastOp.m_checkAdjust;
1579                 break;
1580             }
1581
1582             case OpMatchFailed:
1583                 if (m_pattern.m_body->m_callFrameSize)
1584                     addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
1585                 move(TrustedImm32(-1), returnRegister);
1586                 generateReturn();
1587                 break;
1588             }
1589
1590             ++opIndex;
1591         } while (opIndex < m_ops.size());
1592     }
1593
1594     void backtrack()
1595     {
1596         // Backwards generate the backtracking code.
1597         size_t opIndex = m_ops.size();
1598         ASSERT(opIndex);
1599
1600         do {
1601             --opIndex;
1602             YarrOp& op = m_ops[opIndex];
1603             switch (op.m_op) {
1604
1605             case OpTerm:
1606                 backtrackTerm(opIndex);
1607                 break;
1608
1609             // OpBodyAlternativeBegin/Next/End
1610             //
1611             // For each Begin/Next node representing an alternative, we need to decide what to do
1612             // in two circumstances:
1613             //  - If we backtrack back into this node, from within the alternative.
1614             //  - If the input check at the head of the alternative fails (if this exists).
1615             //
1616             // We treat these two cases differently since in the former case we have slightly
1617             // more information - since we are backtracking out of a prior alternative we know
1618             // that at least enough input was available to run it. For example, given the regular
1619             // expression /a|b/, if we backtrack out of the first alternative (a failed pattern
1620             // character match of 'a'), then we need not perform an additional input availability
1621             // check before running the second alternative.
1622             //
1623             // Backtracking required differs for the last alternative, which in the case of the
1624             // repeating set of alternatives must loop. The code generated for the last alternative
1625             // will also be used to handle all input check failures from any prior alternatives -
1626             // these require similar functionality, in seeking the next available alternative for
1627             // which there is sufficient input.
1628             //
1629             // Since backtracking of all other alternatives simply requires us to link backtracks
1630             // to the reentry point for the subsequent alternative, we will only be generating any
1631             // code when backtracking the last alternative.
1632             case OpBodyAlternativeBegin:
1633             case OpBodyAlternativeNext: {
1634                 PatternAlternative* alternative = op.m_alternative;
1635
1636                 if (op.m_op == OpBodyAlternativeNext) {
1637                     PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
1638                     m_checked += priorAlternative->m_minimumSize;
1639                 }
1640                 m_checked -= alternative->m_minimumSize;
1641
1642                 // Is this the last alternative? If not, then if we backtrack to this point we just
1643                 // need to jump to try to match the next alternative.
1644                 if (m_ops[op.m_nextOp].m_op != OpBodyAlternativeEnd) {
1645                     m_backtrackingState.linkTo(m_ops[op.m_nextOp].m_reentry, this);
1646                     break;
1647                 }
1648                 YarrOp& endOp = m_ops[op.m_nextOp];
1649
1650                 YarrOp* beginOp = &op;
1651                 while (beginOp->m_op != OpBodyAlternativeBegin) {
1652                     ASSERT(beginOp->m_op == OpBodyAlternativeNext);
1653                     beginOp = &m_ops[beginOp->m_previousOp];
1654                 }
1655
1656                 bool onceThrough = endOp.m_nextOp == notFound;
1657
1658                 // First, generate code to handle cases where we backtrack out of an attempted match
1659                 // of the last alternative. If this is a 'once through' set of alternatives then we
1660                 // have nothing to do - link this straight through to the End.
1661                 if (onceThrough)
1662                     m_backtrackingState.linkTo(endOp.m_reentry, this);
1663                 else {
1664                     // If we don't need to move the input poistion, and the pattern has a fixed size
1665                     // (in which case we omit the store of the start index until the pattern has matched)
1666                     // then we can just link the backtrack out of the last alternative straight to the
1667                     // head of the first alternative.
1668                     if (m_pattern.m_body->m_hasFixedSize
1669                         && (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize)
1670                         && (alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize == 1))
1671                         m_backtrackingState.linkTo(beginOp->m_reentry, this);
1672                     else {
1673                         // We need to generate a trampoline of code to execute before looping back
1674                         // around to the first alternative.
1675                         m_backtrackingState.link(this);
1676
1677                         // If the pattern size is not fixed, then store the start index, for use if we match.
1678                         if (!m_pattern.m_body->m_hasFixedSize) {
1679                             if (alternative->m_minimumSize == 1)
1680                                 store32(index, Address(output));
1681                             else {
1682                                 move(index, regT0);
1683                                 if (alternative->m_minimumSize)
1684                                     sub32(Imm32(alternative->m_minimumSize - 1), regT0);
1685                                 else
1686                                     add32(Imm32(1), regT0);
1687                                 store32(regT0, Address(output));
1688                             }
1689                         }
1690
1691                         // Generate code to loop. Check whether the last alternative is longer than the
1692                         // first (e.g. /a|xy/ or /a|xyz/).
1693                         if (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize) {
1694                             // We want to loop, and increment input position. If the delta is 1, it is
1695                             // already correctly incremented, if more than one then decrement as appropriate.
1696                             unsigned delta = alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize;
1697                             ASSERT(delta);
1698                             if (delta != 1)
1699                                 sub32(Imm32(delta - 1), index);
1700                             jump(beginOp->m_reentry);
1701                         } else {
1702                             // If the first alternative has minimum size 0xFFFFFFFFu, then there cannot
1703                             // be sufficent input available to handle this, so just fall through.
1704                             unsigned delta = beginOp->m_alternative->m_minimumSize - alternative->m_minimumSize;
1705                             if (delta != 0xFFFFFFFFu) {
1706                                 // We need to check input because we are incrementing the input.
1707                                 add32(Imm32(delta + 1), index);
1708                                 checkInput().linkTo(beginOp->m_reentry, this);
1709                             }
1710                         }
1711                     }
1712                 }
1713
1714                 // We can reach this point in the code in two ways:
1715                 //  - Fallthrough from the code above (a repeating alternative backtracked out of its
1716                 //    last alternative, and did not have sufficent input to run the first).
1717                 //  - We will loop back up to the following label when a releating alternative loops,
1718                 //    following a failed input check.
1719                 //
1720                 // Either way, we have just failed the input check for the first alternative.
1721                 Label firstInputCheckFailed(this);
1722
1723                 // Generate code to handle input check failures from alternatives except the last.
1724                 // prevOp is the alternative we're handling a bail out from (initially Begin), and
1725                 // nextOp is the alternative we will be attempting to reenter into.
1726                 // 
1727                 // We will link input check failures from the forwards matching path back to the code
1728                 // that can handle them.
1729                 YarrOp* prevOp = beginOp;
1730                 YarrOp* nextOp = &m_ops[beginOp->m_nextOp];
1731                 while (nextOp->m_op != OpBodyAlternativeEnd) {
1732                     prevOp->m_jumps.link(this);
1733
1734                     // We only get here if an input check fails, it is only worth checking again
1735                     // if the next alternative has a minimum size less than the last.
1736                     if (prevOp->m_alternative->m_minimumSize > nextOp->m_alternative->m_minimumSize) {
1737                         // FIXME: if we added an extra label to YarrOp, we could avoid needing to
1738                         // subtract delta back out, and reduce this code. Should performance test
1739                         // the benefit of this.
1740                         unsigned delta = prevOp->m_alternative->m_minimumSize - nextOp->m_alternative->m_minimumSize;
1741                         sub32(Imm32(delta), index);
1742                         Jump fail = jumpIfNoAvailableInput();
1743                         add32(Imm32(delta), index);
1744                         jump(nextOp->m_reentry);
1745                         fail.link(this);
1746                     } else if (prevOp->m_alternative->m_minimumSize < nextOp->m_alternative->m_minimumSize)
1747                         add32(Imm32(nextOp->m_alternative->m_minimumSize - prevOp->m_alternative->m_minimumSize), index);
1748                     prevOp = nextOp;
1749                     nextOp = &m_ops[nextOp->m_nextOp];
1750                 }
1751
1752                 // We fall through to here if there is insufficient input to run the last alternative.
1753
1754                 // If there is insufficient input to run the last alternative, then for 'once through'
1755                 // alternatives we are done - just jump back up into the forwards matching path at the End.
1756                 if (onceThrough) {
1757                     op.m_jumps.linkTo(endOp.m_reentry, this);
1758                     jump(endOp.m_reentry);
1759                     break;
1760                 }
1761
1762                 // For repeating alternatives, link any input check failure from the last alternative to
1763                 // this point.
1764                 op.m_jumps.link(this);
1765
1766                 bool needsToUpdateMatchStart = !m_pattern.m_body->m_hasFixedSize;
1767
1768                 // Check for cases where input position is already incremented by 1 for the last
1769                 // alternative (this is particularly useful where the minimum size of the body
1770                 // disjunction is 0, e.g. /a*|b/).
1771                 if (needsToUpdateMatchStart && alternative->m_minimumSize == 1) {
1772                     // index is already incremented by 1, so just store it now!
1773                     store32(index, Address(output));
1774                     needsToUpdateMatchStart = false;
1775                 }
1776
1777                 // Check whether there is sufficient input to loop. Increment the input position by
1778                 // one, and check. Also add in the minimum disjunction size before checking - there
1779                 // is no point in looping if we're just going to fail all the input checks around
1780                 // the next iteration.
1781                 ASSERT(alternative->m_minimumSize >= m_pattern.m_body->m_minimumSize);
1782                 if (alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) {
1783                     // If the last alternative had the same minimum size as the disjunction,
1784                     // just simply increment input pos by 1, no adjustment based on minimum size.
1785                     add32(Imm32(1), index);
1786                 } else {
1787                     // If the minumum for the last alternative was one greater than than that
1788                     // for the disjunction, we're already progressed by 1, nothing to do!
1789                     unsigned delta = (alternative->m_minimumSize - m_pattern.m_body->m_minimumSize) - 1;
1790                     if (delta)
1791                         sub32(Imm32(delta), index);
1792                 }
1793                 Jump matchFailed = jumpIfNoAvailableInput();
1794
1795                 if (needsToUpdateMatchStart) {
1796                     if (!m_pattern.m_body->m_minimumSize)
1797                         store32(index, Address(output));
1798                     else {
1799                         move(index, regT0);
1800                         sub32(Imm32(m_pattern.m_body->m_minimumSize), regT0);
1801                         store32(regT0, Address(output));
1802                     }
1803                 }
1804
1805                 // Calculate how much more input the first alternative requires than the minimum
1806                 // for the body as a whole. If no more is needed then we dont need an additional
1807                 // input check here - jump straight back up to the start of the first alternative.
1808                 if (beginOp->m_alternative->m_minimumSize == m_pattern.m_body->m_minimumSize)
1809                     jump(beginOp->m_reentry);
1810                 else {
1811                     if (beginOp->m_alternative->m_minimumSize > m_pattern.m_body->m_minimumSize)
1812                         add32(Imm32(beginOp->m_alternative->m_minimumSize - m_pattern.m_body->m_minimumSize), index);
1813                     else
1814                         sub32(Imm32(m_pattern.m_body->m_minimumSize - beginOp->m_alternative->m_minimumSize), index);
1815                     checkInput().linkTo(beginOp->m_reentry, this);
1816                     jump(firstInputCheckFailed);
1817                 }
1818
1819                 // We jump to here if we iterate to the point that there is insufficient input to
1820                 // run any matches, and need to return a failure state from JIT code.
1821                 matchFailed.link(this);
1822
1823                 if (m_pattern.m_body->m_callFrameSize)
1824                     addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
1825                 move(TrustedImm32(-1), returnRegister);
1826                 generateReturn();
1827                 break;
1828             }
1829             case OpBodyAlternativeEnd: {
1830                 // We should never backtrack back into a body disjunction.
1831                 ASSERT(m_backtrackingState.isEmpty());
1832
1833                 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
1834                 m_checked += priorAlternative->m_minimumSize;
1835                 break;
1836             }
1837
1838             // OpSimpleNestedAlternativeBegin/Next/End
1839             // OpNestedAlternativeBegin/Next/End
1840             //
1841             // Generate code for when we backtrack back out of an alternative into
1842             // a Begin or Next node, or when the entry input count check fails. If
1843             // there are more alternatives we need to jump to the next alternative,
1844             // if not we backtrack back out of the current set of parentheses.
1845             //
1846             // In the case of non-simple nested assertions we need to also link the
1847             // 'return address' appropriately to backtrack back out into the correct
1848             // alternative.
1849             case OpSimpleNestedAlternativeBegin:
1850             case OpSimpleNestedAlternativeNext:
1851             case OpNestedAlternativeBegin:
1852             case OpNestedAlternativeNext: {
1853                 YarrOp& nextOp = m_ops[op.m_nextOp];
1854                 bool isBegin = op.m_previousOp == notFound;
1855                 bool isLastAlternative = nextOp.m_nextOp == notFound;
1856                 ASSERT(isBegin == (op.m_op == OpSimpleNestedAlternativeBegin || op.m_op == OpNestedAlternativeBegin));
1857                 ASSERT(isLastAlternative == (nextOp.m_op == OpSimpleNestedAlternativeEnd || nextOp.m_op == OpNestedAlternativeEnd));
1858
1859                 // Treat an input check failure the same as a failed match.
1860                 m_backtrackingState.append(op.m_jumps);
1861
1862                 // Set the backtracks to jump to the appropriate place. We may need
1863                 // to link the backtracks in one of three different way depending on
1864                 // the type of alternative we are dealing with:
1865                 //  - A single alternative, with no simplings.
1866                 //  - The last alternative of a set of two or more.
1867                 //  - An alternative other than the last of a set of two or more.
1868                 //
1869                 // In the case of a single alternative on its own, we don't need to
1870                 // jump anywhere - if the alternative fails to match we can just
1871                 // continue to backtrack out of the parentheses without jumping.
1872                 //
1873                 // In the case of the last alternative in a set of more than one, we
1874                 // need to jump to return back out to the beginning. We'll do so by
1875                 // adding a jump to the End node's m_jumps list, and linking this
1876                 // when we come to generate the Begin node. For alternatives other
1877                 // than the last, we need to jump to the next alternative.
1878                 //
1879                 // If the alternative had adjusted the input position we must link
1880                 // backtracking to here, correct, and then jump on. If not we can
1881                 // link the backtracks directly to their destination.
1882                 if (op.m_checkAdjust) {
1883                     // Handle the cases where we need to link the backtracks here.
1884                     m_backtrackingState.link(this);
1885                     sub32(Imm32(op.m_checkAdjust), index);
1886                     if (!isLastAlternative) {
1887                         // An alternative that is not the last should jump to its successor.
1888                         jump(nextOp.m_reentry);
1889                     } else if (!isBegin) {
1890                         // The last of more than one alternatives must jump back to the begnning.
1891                         nextOp.m_jumps.append(jump());
1892                     } else {
1893                         // A single alternative on its own can fall through.
1894                         m_backtrackingState.fallthrough();
1895                     }
1896                 } else {
1897                     // Handle the cases where we can link the backtracks directly to their destinations.
1898                     if (!isLastAlternative) {
1899                         // An alternative that is not the last should jump to its successor.
1900                         m_backtrackingState.linkTo(nextOp.m_reentry, this);
1901                     } else if (!isBegin) {
1902                         // The last of more than one alternatives must jump back to the begnning.
1903                         m_backtrackingState.takeBacktracksToJumpList(nextOp.m_jumps, this);
1904                     }
1905                     // In the case of a single alternative on its own do nothing - it can fall through.
1906                 }
1907
1908                 // At this point we've handled the backtracking back into this node.
1909                 // Now link any backtracks that need to jump to here.
1910
1911                 // For non-simple alternatives, link the alternative's 'return address'
1912                 // so that we backtrack back out into the previous alternative.
1913                 if (op.m_op == OpNestedAlternativeNext)
1914                     m_backtrackingState.append(op.m_returnAddress);
1915
1916                 // If there is more than one alternative, then the last alternative will
1917                 // have planted a jump to be linked to the end. This jump was added to the
1918                 // End node's m_jumps list. If we are back at the beginning, link it here.
1919                 if (isBegin) {
1920                     YarrOp* endOp = &m_ops[op.m_nextOp];
1921                     while (endOp->m_nextOp != notFound) {
1922                         ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
1923                         endOp = &m_ops[endOp->m_nextOp];
1924                     }
1925                     ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
1926                     m_backtrackingState.append(endOp->m_jumps);
1927                 }
1928
1929                 if (!isBegin) {
1930                     YarrOp& lastOp = m_ops[op.m_previousOp];
1931                     m_checked += lastOp.m_checkAdjust;
1932                 }
1933                 m_checked -= op.m_checkAdjust;
1934                 break;
1935             }
1936             case OpSimpleNestedAlternativeEnd:
1937             case OpNestedAlternativeEnd: {
1938                 PatternTerm* term = op.m_term;
1939
1940                 // If we backtrack into the end of a simple subpattern do nothing;
1941                 // just continue through into the last alternative. If we backtrack
1942                 // into the end of a non-simple set of alterntives we need to jump
1943                 // to the backtracking return address set up during generation.
1944                 if (op.m_op == OpNestedAlternativeEnd) {
1945                     m_backtrackingState.link(this);
1946
1947                     // Plant a jump to the return address.
1948                     unsigned parenthesesFrameLocation = term->frameLocation;
1949                     unsigned alternativeFrameLocation = parenthesesFrameLocation;
1950                     if (term->quantityType != QuantifierFixedCount)
1951                         alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1952                     loadFromFrameAndJump(alternativeFrameLocation);
1953
1954                     // Link the DataLabelPtr associated with the end of the last
1955                     // alternative to this point.
1956                     m_backtrackingState.append(op.m_returnAddress);
1957                 }
1958
1959                 YarrOp& lastOp = m_ops[op.m_previousOp];
1960                 m_checked += lastOp.m_checkAdjust;
1961                 break;
1962             }
1963
1964             // OpParenthesesSubpatternOnceBegin/End
1965             //
1966             // When we are backtracking back out of a capturing subpattern we need
1967             // to clear the start index in the matches output array, to record that
1968             // this subpattern has not been captured.
1969             //
1970             // When backtracking back out of a Greedy quantified subpattern we need
1971             // to catch this, and try running the remainder of the alternative after
1972             // the subpattern again, skipping the parentheses.
1973             //
1974             // Upon backtracking back into a quantified set of parentheses we need to
1975             // check whether we were currently skipping the subpattern. If not, we
1976             // can backtrack into them, if we were we need to either backtrack back
1977             // out of the start of the parentheses, or jump back to the forwards
1978             // matching start, depending of whether the match is Greedy or NonGreedy.
1979             case OpParenthesesSubpatternOnceBegin: {
1980                 PatternTerm* term = op.m_term;
1981                 ASSERT(term->quantityCount == 1);
1982
1983                 // We only need to backtrack to thispoint if capturing or greedy.
1984                 if (term->capture() || term->quantityType == QuantifierGreedy) {
1985                     m_backtrackingState.link(this);
1986
1987                     // If capturing, clear the capture (we only need to reset start).
1988                     if (term->capture())
1989                         store32(TrustedImm32(-1), Address(output, (term->parentheses.subpatternId << 1) * sizeof(int)));
1990
1991                     // If Greedy, jump to the end.
1992                     if (term->quantityType == QuantifierGreedy) {
1993                         // Clear the flag in the stackframe indicating we ran through the subpattern.
1994                         unsigned parenthesesFrameLocation = term->frameLocation;
1995                         storeToFrame(TrustedImm32(-1), parenthesesFrameLocation);
1996                         // Jump to after the parentheses, skipping the subpattern.
1997                         jump(m_ops[op.m_nextOp].m_reentry);
1998                         // A backtrack from after the parentheses, when skipping the subpattern,
1999                         // will jump back to here.
2000                         op.m_jumps.link(this);
2001                     }
2002
2003                     m_backtrackingState.fallthrough();
2004                 }
2005                 break;
2006             }
2007             case OpParenthesesSubpatternOnceEnd: {
2008                 PatternTerm* term = op.m_term;
2009
2010                 if (term->quantityType != QuantifierFixedCount) {
2011                     m_backtrackingState.link(this);
2012
2013                     // Check whether we should backtrack back into the parentheses, or if we
2014                     // are currently in a state where we had skipped over the subpattern
2015                     // (in which case the flag value on the stack will be -1).
2016                     unsigned parenthesesFrameLocation = term->frameLocation;
2017                     Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*)), TrustedImm32(-1));
2018
2019                     if (term->quantityType == QuantifierGreedy) {
2020                         // For Greedy parentheses, we skip after having already tried going
2021                         // through the subpattern, so if we get here we're done.
2022                         YarrOp& beginOp = m_ops[op.m_previousOp];
2023                         beginOp.m_jumps.append(hadSkipped);
2024                     } else {
2025                         // For NonGreedy parentheses, we try skipping the subpattern first,
2026                         // so if we get here we need to try running through the subpattern
2027                         // next. Jump back to the start of the parentheses in the forwards
2028                         // matching path.
2029                         ASSERT(term->quantityType == QuantifierNonGreedy);
2030                         YarrOp& beginOp = m_ops[op.m_previousOp];
2031                         hadSkipped.linkTo(beginOp.m_reentry, this);
2032                     }
2033
2034                     m_backtrackingState.fallthrough();
2035                 }
2036
2037                 m_backtrackingState.append(op.m_jumps);
2038                 break;
2039             }
2040
2041             // OpParenthesesSubpatternTerminalBegin/End
2042             //
2043             // Terminal subpatterns will always match - there is nothing after them to
2044             // force a backtrack, and they have a minimum count of 0, and as such will
2045             // always produce an acceptable result.
2046             case OpParenthesesSubpatternTerminalBegin: {
2047                 // We will backtrack to this point once the subpattern cannot match any
2048                 // more. Since no match is accepted as a successful match (we are Greedy
2049                 // quantified with a minimum of zero) jump back to the forwards matching
2050                 // path at the end.
2051                 YarrOp& endOp = m_ops[op.m_nextOp];
2052                 m_backtrackingState.linkTo(endOp.m_reentry, this);
2053                 break;
2054             }
2055             case OpParenthesesSubpatternTerminalEnd:
2056                 // We should never be backtracking to here (hence the 'terminal' in the name).
2057                 ASSERT(m_backtrackingState.isEmpty());
2058                 m_backtrackingState.append(op.m_jumps);
2059                 break;
2060
2061             // OpParentheticalAssertionBegin/End
2062             case OpParentheticalAssertionBegin: {
2063                 PatternTerm* term = op.m_term;
2064                 YarrOp& endOp = m_ops[op.m_nextOp];
2065
2066                 // We need to handle the backtracks upon backtracking back out
2067                 // of a parenthetical assertion if either we need to correct
2068                 // the input index, or the assertion was inverted.
2069                 if (op.m_checkAdjust || term->invert()) {
2070                      m_backtrackingState.link(this);
2071
2072                     if (op.m_checkAdjust)
2073                         add32(Imm32(op.m_checkAdjust), index);
2074
2075                     // In an inverted assertion failure to match the subpattern
2076                     // is treated as a successful match - jump to the end of the
2077                     // subpattern. We already have adjusted the input position
2078                     // back to that before the assertion, which is correct.
2079                     if (term->invert())
2080                         jump(endOp.m_reentry);
2081
2082                     m_backtrackingState.fallthrough();
2083                 }
2084
2085                 // The End node's jump list will contain any backtracks into
2086                 // the end of the assertion. Also, if inverted, we will have
2087                 // added the failure caused by a successful match to this.
2088                 m_backtrackingState.append(endOp.m_jumps);
2089
2090                 m_checked += op.m_checkAdjust;
2091                 break;
2092             }
2093             case OpParentheticalAssertionEnd: {
2094                 // FIXME: We should really be clearing any nested subpattern
2095                 // matches on bailing out from after the pattern. Firefox has
2096                 // this bug too (presumably because they use YARR!)
2097
2098                 // Never backtrack into an assertion; later failures bail to before the begin.
2099                 m_backtrackingState.takeBacktracksToJumpList(op.m_jumps, this);
2100
2101                 YarrOp& lastOp = m_ops[op.m_previousOp];
2102                 m_checked -= lastOp.m_checkAdjust;
2103                 break;
2104             }
2105
2106             case OpMatchFailed:
2107                 break;
2108             }
2109
2110         } while (opIndex);
2111     }
2112
2113     // Compilation methods:
2114     // ====================
2115
2116     // opCompileParenthesesSubpattern
2117     // Emits ops for a subpattern (set of parentheses). These consist
2118     // of a set of alternatives wrapped in an outer set of nodes for
2119     // the parentheses.
2120     // Supported types of parentheses are 'Once' (quantityCount == 1)
2121     // and 'Terminal' (non-capturing parentheses quantified as greedy
2122     // and infinite).
2123     // Alternatives will use the 'Simple' set of ops if either the
2124     // subpattern is terminal (in which case we will never need to
2125     // backtrack), or if the subpattern only contains one alternative.
2126     void opCompileParenthesesSubpattern(PatternTerm* term)
2127     {
2128         YarrOpCode parenthesesBeginOpCode;
2129         YarrOpCode parenthesesEndOpCode;
2130         YarrOpCode alternativeBeginOpCode = OpSimpleNestedAlternativeBegin;
2131         YarrOpCode alternativeNextOpCode = OpSimpleNestedAlternativeNext;
2132         YarrOpCode alternativeEndOpCode = OpSimpleNestedAlternativeEnd;
2133
2134         // We can currently only compile quantity 1 subpatterns that are
2135         // not copies. We generate a copy in the case of a range quantifier,
2136         // e.g. /(?:x){3,9}/, or /(?:x)+/ (These are effectively expanded to
2137         // /(?:x){3,3}(?:x){0,6}/ and /(?:x)(?:x)*/ repectively). The problem
2138         // comes where the subpattern is capturing, in which case we would
2139         // need to restore the capture from the first subpattern upon a
2140         // failure in the second.
2141         if (term->quantityCount == 1 && !term->parentheses.isCopy) {
2142             // Select the 'Once' nodes.
2143             parenthesesBeginOpCode = OpParenthesesSubpatternOnceBegin;
2144             parenthesesEndOpCode = OpParenthesesSubpatternOnceEnd;
2145
2146             // If there is more than one alternative we cannot use the 'simple' nodes.
2147             if (term->parentheses.disjunction->m_alternatives.size() != 1) {
2148                 alternativeBeginOpCode = OpNestedAlternativeBegin;
2149                 alternativeNextOpCode = OpNestedAlternativeNext;
2150                 alternativeEndOpCode = OpNestedAlternativeEnd;
2151             }
2152         } else if (term->parentheses.isTerminal) {
2153             // Select the 'Terminal' nodes.
2154             parenthesesBeginOpCode = OpParenthesesSubpatternTerminalBegin;
2155             parenthesesEndOpCode = OpParenthesesSubpatternTerminalEnd;
2156         } else {
2157             // This subpattern is not supported by the JIT.
2158             m_shouldFallBack = true;
2159             return;
2160         }
2161
2162         size_t parenBegin = m_ops.size();
2163         m_ops.append(parenthesesBeginOpCode);
2164
2165         m_ops.append(alternativeBeginOpCode);
2166         m_ops.last().m_previousOp = notFound;
2167         m_ops.last().m_term = term;
2168         Vector<PatternAlternative*>& alternatives =  term->parentheses.disjunction->m_alternatives;
2169         for (unsigned i = 0; i < alternatives.size(); ++i) {
2170             size_t lastOpIndex = m_ops.size() - 1;
2171
2172             PatternAlternative* nestedAlternative = alternatives[i];
2173             opCompileAlternative(nestedAlternative);
2174
2175             size_t thisOpIndex = m_ops.size();
2176             m_ops.append(YarrOp(alternativeNextOpCode));
2177
2178             YarrOp& lastOp = m_ops[lastOpIndex];
2179             YarrOp& thisOp = m_ops[thisOpIndex];
2180
2181             lastOp.m_alternative = nestedAlternative;
2182             lastOp.m_nextOp = thisOpIndex;
2183             thisOp.m_previousOp = lastOpIndex;
2184             thisOp.m_term = term;
2185         }
2186         YarrOp& lastOp = m_ops.last();
2187         ASSERT(lastOp.m_op == alternativeNextOpCode);
2188         lastOp.m_op = alternativeEndOpCode;
2189         lastOp.m_alternative = 0;
2190         lastOp.m_nextOp = notFound;
2191
2192         size_t parenEnd = m_ops.size();
2193         m_ops.append(parenthesesEndOpCode);
2194
2195         m_ops[parenBegin].m_term = term;
2196         m_ops[parenBegin].m_previousOp = notFound;
2197         m_ops[parenBegin].m_nextOp = parenEnd;
2198         m_ops[parenEnd].m_term = term;
2199         m_ops[parenEnd].m_previousOp = parenBegin;
2200         m_ops[parenEnd].m_nextOp = notFound;
2201     }
2202
2203     // opCompileParentheticalAssertion
2204     // Emits ops for a parenthetical assertion. These consist of an
2205     // OpSimpleNestedAlternativeBegin/Next/End set of nodes wrapping
2206     // the alternatives, with these wrapped by an outer pair of
2207     // OpParentheticalAssertionBegin/End nodes.
2208     // We can always use the OpSimpleNestedAlternative nodes in the
2209     // case of parenthetical assertions since these only ever match
2210     // once, and will never backtrack back into the assertion.
2211     void opCompileParentheticalAssertion(PatternTerm* term)
2212     {
2213         size_t parenBegin = m_ops.size();
2214         m_ops.append(OpParentheticalAssertionBegin);
2215
2216         m_ops.append(OpSimpleNestedAlternativeBegin);
2217         m_ops.last().m_previousOp = notFound;
2218         m_ops.last().m_term = term;
2219         Vector<PatternAlternative*>& alternatives =  term->parentheses.disjunction->m_alternatives;
2220         for (unsigned i = 0; i < alternatives.size(); ++i) {
2221             size_t lastOpIndex = m_ops.size() - 1;
2222
2223             PatternAlternative* nestedAlternative = alternatives[i];
2224             opCompileAlternative(nestedAlternative);
2225
2226             size_t thisOpIndex = m_ops.size();
2227             m_ops.append(YarrOp(OpSimpleNestedAlternativeNext));
2228
2229             YarrOp& lastOp = m_ops[lastOpIndex];
2230             YarrOp& thisOp = m_ops[thisOpIndex];
2231
2232             lastOp.m_alternative = nestedAlternative;
2233             lastOp.m_nextOp = thisOpIndex;
2234             thisOp.m_previousOp = lastOpIndex;
2235             thisOp.m_term = term;
2236         }
2237         YarrOp& lastOp = m_ops.last();
2238         ASSERT(lastOp.m_op == OpSimpleNestedAlternativeNext);
2239         lastOp.m_op = OpSimpleNestedAlternativeEnd;
2240         lastOp.m_alternative = 0;
2241         lastOp.m_nextOp = notFound;
2242
2243         size_t parenEnd = m_ops.size();
2244         m_ops.append(OpParentheticalAssertionEnd);
2245
2246         m_ops[parenBegin].m_term = term;
2247         m_ops[parenBegin].m_previousOp = notFound;
2248         m_ops[parenBegin].m_nextOp = parenEnd;
2249         m_ops[parenEnd].m_term = term;
2250         m_ops[parenEnd].m_previousOp = parenBegin;
2251         m_ops[parenEnd].m_nextOp = notFound;
2252     }
2253
2254     // opCompileAlternative
2255     // Called to emit nodes for all terms in an alternative.
2256     void opCompileAlternative(PatternAlternative* alternative)
2257     {
2258         optimizeAlternative(alternative);
2259
2260         for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
2261             PatternTerm* term = &alternative->m_terms[i];
2262
2263             switch (term->type) {
2264             case PatternTerm::TypeParenthesesSubpattern:
2265                 opCompileParenthesesSubpattern(term);
2266                 break;
2267
2268             case PatternTerm::TypeParentheticalAssertion:
2269                 opCompileParentheticalAssertion(term);
2270                 break;
2271
2272             default:
2273                 m_ops.append(term);
2274             }
2275         }
2276     }
2277
2278     // opCompileBody
2279     // This method compiles the body disjunction of the regular expression.
2280     // The body consists of two sets of alternatives - zero or more 'once
2281     // through' (BOL anchored) alternatives, followed by zero or more
2282     // repeated alternatives.
2283     // For each of these two sets of alteratives, if not empty they will be
2284     // wrapped in a set of OpBodyAlternativeBegin/Next/End nodes (with the
2285     // 'begin' node referencing the first alternative, and 'next' nodes
2286     // referencing any further alternatives. The begin/next/end nodes are
2287     // linked together in a doubly linked list. In the case of repeating
2288     // alternatives, the end node is also linked back to the beginning.
2289     // If no repeating alternatives exist, then a OpMatchFailed node exists
2290     // to return the failing result.
2291     void opCompileBody(PatternDisjunction* disjunction)
2292     {
2293         Vector<PatternAlternative*>& alternatives =  disjunction->m_alternatives;
2294         size_t currentAlternativeIndex = 0;
2295
2296         // Emit the 'once through' alternatives.
2297         if (alternatives.size() && alternatives[0]->onceThrough()) {
2298             m_ops.append(YarrOp(OpBodyAlternativeBegin));
2299             m_ops.last().m_previousOp = notFound;
2300
2301             do {
2302                 size_t lastOpIndex = m_ops.size() - 1;
2303                 PatternAlternative* alternative = alternatives[currentAlternativeIndex];
2304                 opCompileAlternative(alternative);
2305
2306                 size_t thisOpIndex = m_ops.size();
2307                 m_ops.append(YarrOp(OpBodyAlternativeNext));
2308
2309                 YarrOp& lastOp = m_ops[lastOpIndex];
2310                 YarrOp& thisOp = m_ops[thisOpIndex];
2311
2312                 lastOp.m_alternative = alternative;
2313                 lastOp.m_nextOp = thisOpIndex;
2314                 thisOp.m_previousOp = lastOpIndex;
2315                 
2316                 ++currentAlternativeIndex;
2317             } while (currentAlternativeIndex < alternatives.size() && alternatives[currentAlternativeIndex]->onceThrough());
2318
2319             YarrOp& lastOp = m_ops.last();
2320
2321             ASSERT(lastOp.m_op == OpBodyAlternativeNext);
2322             lastOp.m_op = OpBodyAlternativeEnd;
2323             lastOp.m_alternative = 0;
2324             lastOp.m_nextOp = notFound;
2325         }
2326
2327         if (currentAlternativeIndex == alternatives.size()) {
2328             m_ops.append(YarrOp(OpMatchFailed));
2329             return;
2330         }
2331
2332         // Emit the repeated alternatives.
2333         size_t repeatLoop = m_ops.size();
2334         m_ops.append(YarrOp(OpBodyAlternativeBegin));
2335         m_ops.last().m_previousOp = notFound;
2336         do {
2337             size_t lastOpIndex = m_ops.size() - 1;
2338             PatternAlternative* alternative = alternatives[currentAlternativeIndex];
2339             ASSERT(!alternative->onceThrough());
2340             opCompileAlternative(alternative);
2341
2342             size_t thisOpIndex = m_ops.size();
2343             m_ops.append(YarrOp(OpBodyAlternativeNext));
2344
2345             YarrOp& lastOp = m_ops[lastOpIndex];
2346             YarrOp& thisOp = m_ops[thisOpIndex];
2347
2348             lastOp.m_alternative = alternative;
2349             lastOp.m_nextOp = thisOpIndex;
2350             thisOp.m_previousOp = lastOpIndex;
2351             
2352             ++currentAlternativeIndex;
2353         } while (currentAlternativeIndex < alternatives.size());
2354         YarrOp& lastOp = m_ops.last();
2355         ASSERT(lastOp.m_op == OpBodyAlternativeNext);
2356         lastOp.m_op = OpBodyAlternativeEnd;
2357         lastOp.m_alternative = 0;
2358         lastOp.m_nextOp = repeatLoop;
2359     }
2360
2361     void generateEnter()
2362     {
2363 #if CPU(X86_64)
2364         push(X86Registers::ebp);
2365         move(stackPointerRegister, X86Registers::ebp);
2366         push(X86Registers::ebx);
2367 #elif CPU(X86)
2368         push(X86Registers::ebp);
2369         move(stackPointerRegister, X86Registers::ebp);
2370         // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
2371         push(X86Registers::ebx);
2372         push(X86Registers::edi);
2373         push(X86Registers::esi);
2374         // load output into edi (2 = saved ebp + return address).
2375     #if COMPILER(MSVC)
2376         loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input);
2377         loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index);
2378         loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length);
2379         loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output);
2380     #else
2381         loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output);
2382     #endif
2383 #elif CPU(ARM)
2384         push(ARMRegisters::r4);
2385         push(ARMRegisters::r5);
2386         push(ARMRegisters::r6);
2387 #if CPU(ARM_TRADITIONAL)
2388         push(ARMRegisters::r8); // scratch register
2389 #endif
2390         move(ARMRegisters::r3, output);
2391 #elif CPU(SH4)
2392         push(SH4Registers::r11);
2393         push(SH4Registers::r13);
2394 #elif CPU(MIPS)
2395         // Do nothing.
2396 #endif
2397     }
2398
2399     void generateReturn()
2400     {
2401 #if CPU(X86_64)
2402         pop(X86Registers::ebx);
2403         pop(X86Registers::ebp);
2404 #elif CPU(X86)
2405         pop(X86Registers::esi);
2406         pop(X86Registers::edi);
2407         pop(X86Registers::ebx);
2408         pop(X86Registers::ebp);
2409 #elif CPU(ARM)
2410 #if CPU(ARM_TRADITIONAL)
2411         pop(ARMRegisters::r8); // scratch register
2412 #endif
2413         pop(ARMRegisters::r6);
2414         pop(ARMRegisters::r5);
2415         pop(ARMRegisters::r4);
2416 #elif CPU(SH4)
2417         pop(SH4Registers::r13);
2418         pop(SH4Registers::r11);
2419 #elif CPU(MIPS)
2420         // Do nothing
2421 #endif
2422         ret();
2423     }
2424
2425 public:
2426     YarrGenerator(YarrPattern& pattern, YarrCharSize charSize)
2427         : m_pattern(pattern)
2428         , m_charSize(charSize)
2429         , m_charScale(m_charSize == Char8 ? TimesOne: TimesTwo)
2430         , m_shouldFallBack(false)
2431         , m_checked(0)
2432     {
2433     }
2434
2435     void compile(JSGlobalData* globalData, YarrCodeBlock& jitObject)
2436     {
2437         generateEnter();
2438
2439         if (!m_pattern.m_body->m_hasFixedSize)
2440             store32(index, Address(output));
2441
2442         if (m_pattern.m_body->m_callFrameSize)
2443             subPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
2444
2445         // Compile the pattern to the internal 'YarrOp' representation.
2446         opCompileBody(m_pattern.m_body);
2447
2448         // If we encountered anything we can't handle in the JIT code
2449         // (e.g. backreferences) then return early.
2450         if (m_shouldFallBack) {
2451             jitObject.setFallBack(true);
2452             return;
2453         }
2454
2455         generate();
2456         backtrack();
2457
2458         // Link & finalize the code.
2459         LinkBuffer linkBuffer(*globalData, this);
2460         m_backtrackingState.linkDataLabels(linkBuffer);
2461         if (m_charSize == Char8)
2462             jitObject.set8BitCode(linkBuffer.finalizeCode());
2463         else
2464             jitObject.set16BitCode(linkBuffer.finalizeCode());
2465         jitObject.setFallBack(m_shouldFallBack);
2466     }
2467
2468 private:
2469     YarrPattern& m_pattern;
2470
2471     YarrCharSize m_charSize;
2472
2473     Scale m_charScale;
2474
2475     // Used to detect regular expression constructs that are not currently
2476     // supported in the JIT; fall back to the interpreter when this is detected.
2477     bool m_shouldFallBack;
2478
2479     // The regular expression expressed as a linear sequence of operations.
2480     Vector<YarrOp, 128> m_ops;
2481
2482     // This records the current input offset being applied due to the current
2483     // set of alternatives we are nested within. E.g. when matching the
2484     // character 'b' within the regular expression /abc/, we will know that
2485     // the minimum size for the alternative is 3, checked upon entry to the
2486     // alternative, and that 'b' is at offset 1 from the start, and as such
2487     // when matching 'b' we need to apply an offset of -2 to the load.
2488     //
2489     // FIXME: This should go away. Rather than tracking this value throughout
2490     // code generation, we should gather this information up front & store it
2491     // on the YarrOp structure.
2492     int m_checked;
2493
2494     // This class records state whilst generating the backtracking path of code.
2495     BacktrackingState m_backtrackingState;
2496 };
2497
2498 void jitCompile(YarrPattern& pattern, YarrCharSize charSize, JSGlobalData* globalData, YarrCodeBlock& jitObject)
2499 {
2500     YarrGenerator(pattern, charSize).compile(globalData, jitObject);
2501 }
2502
2503 int execute(YarrCodeBlock& jitObject, const char* input, unsigned start, unsigned length, int* output)
2504 {
2505     return jitObject.execute(input, start, length, output);
2506 }
2507
2508 int execute(YarrCodeBlock& jitObject, const UChar* input, unsigned start, unsigned length, int* output)
2509 {
2510     return jitObject.execute(input, start, length, output);
2511 }
2512
2513 }}
2514
2515 #endif