Bug 39795 - Add support for YARR JIT generation of greedy quantified parens at the...
[WebKit-https.git] / JavaScriptCore / yarr / RegexJIT.cpp
1 /*
2  * Copyright (C) 2009 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
24  */
25
26 #include "config.h"
27 #include "RegexJIT.h"
28
29 #include "ASCIICType.h"
30 #include "JSGlobalData.h"
31 #include "LinkBuffer.h"
32 #include "MacroAssembler.h"
33 #include "RegexCompiler.h"
34
35 #include "pcre.h" // temporary, remove when fallback is removed.
36
37 #if ENABLE(YARR_JIT)
38
39 using namespace WTF;
40
41 namespace JSC { namespace Yarr {
42
43 class RegexGenerator : private MacroAssembler {
44     friend void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
45
46 #if CPU(ARM)
47     static const RegisterID input = ARMRegisters::r0;
48     static const RegisterID index = ARMRegisters::r1;
49     static const RegisterID length = ARMRegisters::r2;
50     static const RegisterID output = ARMRegisters::r4;
51
52     static const RegisterID regT0 = ARMRegisters::r5;
53     static const RegisterID regT1 = ARMRegisters::r6;
54
55     static const RegisterID returnRegister = ARMRegisters::r0;
56 #elif CPU(MIPS)
57     static const RegisterID input = MIPSRegisters::a0;
58     static const RegisterID index = MIPSRegisters::a1;
59     static const RegisterID length = MIPSRegisters::a2;
60     static const RegisterID output = MIPSRegisters::a3;
61
62     static const RegisterID regT0 = MIPSRegisters::t4;
63     static const RegisterID regT1 = MIPSRegisters::t5;
64
65     static const RegisterID returnRegister = MIPSRegisters::v0;
66 #elif CPU(X86)
67     static const RegisterID input = X86Registers::eax;
68     static const RegisterID index = X86Registers::edx;
69     static const RegisterID length = X86Registers::ecx;
70     static const RegisterID output = X86Registers::edi;
71
72     static const RegisterID regT0 = X86Registers::ebx;
73     static const RegisterID regT1 = X86Registers::esi;
74
75     static const RegisterID returnRegister = X86Registers::eax;
76 #elif CPU(X86_64)
77     static const RegisterID input = X86Registers::edi;
78     static const RegisterID index = X86Registers::esi;
79     static const RegisterID length = X86Registers::edx;
80     static const RegisterID output = X86Registers::ecx;
81
82     static const RegisterID regT0 = X86Registers::eax;
83     static const RegisterID regT1 = X86Registers::ebx;
84
85     static const RegisterID returnRegister = X86Registers::eax;
86 #endif
87
88     void optimizeAlternative(PatternAlternative* alternative)
89     {
90         if (!alternative->m_terms.size())
91             return;
92
93         for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) {
94             PatternTerm& term = alternative->m_terms[i];
95             PatternTerm& nextTerm = alternative->m_terms[i + 1];
96
97             if ((term.type == PatternTerm::TypeCharacterClass)
98                 && (term.quantityType == QuantifierFixedCount)
99                 && (nextTerm.type == PatternTerm::TypePatternCharacter)
100                 && (nextTerm.quantityType == QuantifierFixedCount)) {
101                 PatternTerm termCopy = term;
102                 alternative->m_terms[i] = nextTerm;
103                 alternative->m_terms[i + 1] = termCopy;
104             }
105         }
106     }
107
108     void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount)
109     {
110         do {
111             // pick which range we're going to generate
112             int which = count >> 1;
113             char lo = ranges[which].begin;
114             char hi = ranges[which].end;
115             
116             // check if there are any ranges or matches below lo.  If not, just jl to failure -
117             // if there is anything else to check, check that first, if it falls through jmp to failure.
118             if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
119                 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
120                 
121                 // generate code for all ranges before this one
122                 if (which)
123                     matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
124                 
125                 while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
126                     matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
127                     ++*matchIndex;
128                 }
129                 failures.append(jump());
130
131                 loOrAbove.link(this);
132             } else if (which) {
133                 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
134
135                 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
136                 failures.append(jump());
137
138                 loOrAbove.link(this);
139             } else
140                 failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));
141
142             while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
143                 ++*matchIndex;
144
145             matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
146             // fall through to here, the value is above hi.
147
148             // shuffle along & loop around if there are any more matches to handle.
149             unsigned next = which + 1;
150             ranges += next;
151             count -= next;
152         } while (count);
153     }
154
155     void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
156     {
157         if (charClass->m_table) {
158             ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table->m_table));
159             matchDest.append(branchTest8(charClass->m_table->m_inverted ? Zero : NonZero, tableEntry));   
160             return;
161         }
162         Jump unicodeFail;
163         if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
164             Jump isAscii = branch32(LessThanOrEqual, character, Imm32(0x7f));
165         
166             if (charClass->m_matchesUnicode.size()) {
167                 for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
168                     UChar ch = charClass->m_matchesUnicode[i];
169                     matchDest.append(branch32(Equal, character, Imm32(ch)));
170                 }
171             }
172             
173             if (charClass->m_rangesUnicode.size()) {
174                 for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
175                     UChar lo = charClass->m_rangesUnicode[i].begin;
176                     UChar hi = charClass->m_rangesUnicode[i].end;
177                     
178                     Jump below = branch32(LessThan, character, Imm32(lo));
179                     matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
180                     below.link(this);
181                 }
182             }
183
184             unicodeFail = jump();
185             isAscii.link(this);
186         }
187
188         if (charClass->m_ranges.size()) {
189             unsigned matchIndex = 0;
190             JumpList failures; 
191             matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size());
192             while (matchIndex < charClass->m_matches.size())
193                 matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++])));
194
195             failures.link(this);
196         } else if (charClass->m_matches.size()) {
197             // optimization: gather 'a','A' etc back together, can mask & test once.
198             Vector<char> matchesAZaz;
199
200             for (unsigned i = 0; i < charClass->m_matches.size(); ++i) {
201                 char ch = charClass->m_matches[i];
202                 if (m_pattern.m_ignoreCase) {
203                     if (isASCIILower(ch)) {
204                         matchesAZaz.append(ch);
205                         continue;
206                     }
207                     if (isASCIIUpper(ch))
208                         continue;
209                 }
210                 matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
211             }
212
213             if (unsigned countAZaz = matchesAZaz.size()) {
214                 or32(Imm32(32), character);
215                 for (unsigned i = 0; i < countAZaz; ++i)
216                     matchDest.append(branch32(Equal, character, Imm32(matchesAZaz[i])));
217             }
218         }
219
220         if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size())
221             unicodeFail.link(this);
222     }
223
224     // Jumps if input not available; will have (incorrectly) incremented already!
225     Jump jumpIfNoAvailableInput(unsigned countToCheck)
226     {
227         add32(Imm32(countToCheck), index);
228         return branch32(Above, index, length);
229     }
230
231     Jump jumpIfAvailableInput(unsigned countToCheck)
232     {
233         add32(Imm32(countToCheck), index);
234         return branch32(BelowOrEqual, index, length);
235     }
236
237     Jump checkInput()
238     {
239         return branch32(BelowOrEqual, index, length);
240     }
241
242     Jump atEndOfInput()
243     {
244         return branch32(Equal, index, length);
245     }
246
247     Jump notAtEndOfInput()
248     {
249         return branch32(NotEqual, index, length);
250     }
251
252     Jump jumpIfCharEquals(UChar ch, int inputPosition)
253     {
254         return branch16(Equal, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
255     }
256
257     Jump jumpIfCharNotEquals(UChar ch, int inputPosition)
258     {
259         return branch16(NotEqual, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
260     }
261
262     void readCharacter(int inputPosition, RegisterID reg)
263     {
264         load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg);
265     }
266
267     void storeToFrame(RegisterID reg, unsigned frameLocation)
268     {
269         poke(reg, frameLocation);
270     }
271
272     void storeToFrame(Imm32 imm, unsigned frameLocation)
273     {
274         poke(imm, frameLocation);
275     }
276
277     DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
278     {
279         return storePtrWithPatch(ImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*)));
280     }
281
282     void loadFromFrame(unsigned frameLocation, RegisterID reg)
283     {
284         peek(reg, frameLocation);
285     }
286
287     void loadFromFrameAndJump(unsigned frameLocation)
288     {
289         jump(Address(stackPointerRegister, frameLocation * sizeof(void*)));
290     }
291
292     struct AlternativeBacktrackRecord {
293         DataLabelPtr dataLabel;
294         Label backtrackLocation;
295
296         AlternativeBacktrackRecord(DataLabelPtr dataLabel, Label backtrackLocation)
297             : dataLabel(dataLabel)
298             , backtrackLocation(backtrackLocation)
299         {
300         }
301     };
302
303     struct TermGenerationState {
304         TermGenerationState(PatternDisjunction* disjunction, unsigned checkedTotal)
305             : disjunction(disjunction)
306             , checkedTotal(checkedTotal)
307         {
308         }
309
310         void resetAlternative()
311         {
312             isBackTrackGenerated = false;
313             alt = 0;
314         }
315         bool alternativeValid()
316         {
317             return alt < disjunction->m_alternatives.size();
318         }
319         void nextAlternative()
320         {
321             ++alt;
322         }
323         PatternAlternative* alternative()
324         {
325             return disjunction->m_alternatives[alt];
326         }
327
328         void resetTerm()
329         {
330             ASSERT(alternativeValid());
331             t = 0;
332         }
333         bool termValid()
334         {
335             ASSERT(alternativeValid());
336             return t < alternative()->m_terms.size();
337         }
338         void nextTerm()
339         {
340             ASSERT(alternativeValid());
341             ++t;
342         }
343         PatternTerm& term()
344         {
345             ASSERT(alternativeValid());
346             return alternative()->m_terms[t];
347         }
348         bool isLastTerm()
349         {
350             ASSERT(alternativeValid());
351             return (t + 1) == alternative()->m_terms.size();
352         }
353         bool isMainDisjunction()
354         {
355             return !disjunction->m_parent;
356         }
357
358         PatternTerm& lookaheadTerm()
359         {
360             ASSERT(alternativeValid());
361             ASSERT((t + 1) < alternative()->m_terms.size());
362             return alternative()->m_terms[t + 1];
363         }
364         bool isSinglePatternCharacterLookaheadTerm()
365         {
366             ASSERT(alternativeValid());
367             return ((t + 1) < alternative()->m_terms.size())
368                 && (lookaheadTerm().type == PatternTerm::TypePatternCharacter)
369                 && (lookaheadTerm().quantityType == QuantifierFixedCount)
370                 && (lookaheadTerm().quantityCount == 1);
371         }
372
373         int inputOffset()
374         {
375             return term().inputPosition - checkedTotal;
376         }
377
378         void jumpToBacktrack(Jump jump, MacroAssembler* masm)
379         {
380             if (isBackTrackGenerated)
381                 jump.linkTo(backtrackLabel, masm);
382             else
383                 backTrackJumps.append(jump);
384         }
385         void jumpToBacktrack(JumpList& jumps, MacroAssembler* masm)
386         {
387             if (isBackTrackGenerated)
388                 jumps.linkTo(backtrackLabel, masm);
389             else
390                 backTrackJumps.append(jumps);
391         }
392         bool plantJumpToBacktrackIfExists(MacroAssembler* masm)
393         {
394             if (isBackTrackGenerated) {
395                 masm->jump(backtrackLabel);
396                 return true;
397             }
398             return false;
399         }
400         void addBacktrackJump(Jump jump)
401         {
402             backTrackJumps.append(jump);
403         }
404         void setBacktrackGenerated(Label label)
405         {
406             isBackTrackGenerated = true;
407             backtrackLabel = label;
408         }
409         void linkAlternativeBacktracks(MacroAssembler* masm)
410         {
411             isBackTrackGenerated = false;
412             backTrackJumps.link(masm);
413         }
414         void linkAlternativeBacktracksTo(Label label, MacroAssembler* masm)
415         {
416             isBackTrackGenerated = false;
417             backTrackJumps.linkTo(label, masm);
418         }
419         void propagateBacktrackingFrom(TermGenerationState& nestedParenthesesState, MacroAssembler* masm)
420         {
421             jumpToBacktrack(nestedParenthesesState.backTrackJumps, masm);
422             if (nestedParenthesesState.isBackTrackGenerated)
423                 setBacktrackGenerated(nestedParenthesesState.backtrackLabel);
424         }
425
426         PatternDisjunction* disjunction;
427         int checkedTotal;
428     private:
429         unsigned alt;
430         unsigned t;
431         JumpList backTrackJumps;
432         Label backtrackLabel;
433         bool isBackTrackGenerated;
434     };
435
436     void generateAssertionBOL(TermGenerationState& state)
437     {
438         PatternTerm& term = state.term();
439
440         if (m_pattern.m_multiline) {
441             const RegisterID character = regT0;
442
443             JumpList matchDest;
444             if (!term.inputPosition)
445                 matchDest.append(branch32(Equal, index, Imm32(state.checkedTotal)));
446
447             readCharacter(state.inputOffset() - 1, character);
448             matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
449             state.jumpToBacktrack(jump(), this);
450
451             matchDest.link(this);
452         } else {
453             // Erk, really should poison out these alternatives early. :-/
454             if (term.inputPosition)
455                 state.jumpToBacktrack(jump(), this);
456             else
457                 state.jumpToBacktrack(branch32(NotEqual, index, Imm32(state.checkedTotal)), this);
458         }
459     }
460
461     void generateAssertionEOL(TermGenerationState& state)
462     {
463         PatternTerm& term = state.term();
464
465         if (m_pattern.m_multiline) {
466             const RegisterID character = regT0;
467
468             JumpList matchDest;
469             if (term.inputPosition == state.checkedTotal)
470                 matchDest.append(atEndOfInput());
471
472             readCharacter(state.inputOffset(), character);
473             matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
474             state.jumpToBacktrack(jump(), this);
475
476             matchDest.link(this);
477         } else {
478             if (term.inputPosition == state.checkedTotal)
479                 state.jumpToBacktrack(notAtEndOfInput(), this);
480             // Erk, really should poison out these alternatives early. :-/
481             else
482                 state.jumpToBacktrack(jump(), this);
483         }
484     }
485
486     // Also falls though on nextIsNotWordChar.
487     void matchAssertionWordchar(TermGenerationState& state, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
488     {
489         const RegisterID character = regT0;
490         PatternTerm& term = state.term();
491
492         if (term.inputPosition == state.checkedTotal)
493             nextIsNotWordChar.append(atEndOfInput());
494
495         readCharacter(state.inputOffset(), character);
496         matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass());
497     }
498
499     void generateAssertionWordBoundary(TermGenerationState& state)
500     {
501         const RegisterID character = regT0;
502         PatternTerm& term = state.term();
503
504         Jump atBegin;
505         JumpList matchDest;
506         if (!term.inputPosition)
507             atBegin = branch32(Equal, index, Imm32(state.checkedTotal));
508         readCharacter(state.inputOffset() - 1, character);
509         matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass());
510         if (!term.inputPosition)
511             atBegin.link(this);
512
513         // We fall through to here if the last character was not a wordchar.
514         JumpList nonWordCharThenWordChar;
515         JumpList nonWordCharThenNonWordChar;
516         if (term.invertOrCapture) {
517             matchAssertionWordchar(state, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
518             nonWordCharThenWordChar.append(jump());
519         } else {
520             matchAssertionWordchar(state, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
521             nonWordCharThenNonWordChar.append(jump());
522         }
523         state.jumpToBacktrack(nonWordCharThenNonWordChar, this);
524
525         // We jump here if the last character was a wordchar.
526         matchDest.link(this);
527         JumpList wordCharThenWordChar;
528         JumpList wordCharThenNonWordChar;
529         if (term.invertOrCapture) {
530             matchAssertionWordchar(state, wordCharThenNonWordChar, wordCharThenWordChar);
531             wordCharThenWordChar.append(jump());
532         } else {
533             matchAssertionWordchar(state, wordCharThenWordChar, wordCharThenNonWordChar);
534             // This can fall-though!
535         }
536
537         state.jumpToBacktrack(wordCharThenWordChar, this);
538         
539         nonWordCharThenWordChar.link(this);
540         wordCharThenNonWordChar.link(this);
541     }
542
543     void generatePatternCharacterSingle(TermGenerationState& state)
544     {
545         const RegisterID character = regT0;
546         UChar ch = state.term().patternCharacter;
547
548         if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
549             readCharacter(state.inputOffset(), character);
550             or32(Imm32(32), character);
551             state.jumpToBacktrack(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))), this);
552         } else {
553             ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
554             state.jumpToBacktrack(jumpIfCharNotEquals(ch, state.inputOffset()), this);
555         }
556     }
557
558     void generatePatternCharacterPair(TermGenerationState& state)
559     {
560         const RegisterID character = regT0;
561         UChar ch1 = state.term().patternCharacter;
562         UChar ch2 = state.lookaheadTerm().patternCharacter;
563
564         int mask = 0;
565         int chPair = ch1 | (ch2 << 16);
566         
567         if (m_pattern.m_ignoreCase) {
568             if (isASCIIAlpha(ch1))
569                 mask |= 32;
570             if (isASCIIAlpha(ch2))
571                 mask |= 32 << 16;
572         }
573
574         if (mask) {
575             load32WithUnalignedHalfWords(BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), character);
576             or32(Imm32(mask), character);
577             state.jumpToBacktrack(branch32(NotEqual, character, Imm32(chPair | mask)), this);
578         } else
579             state.jumpToBacktrack(branch32WithUnalignedHalfWords(NotEqual, BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), Imm32(chPair)), this);
580     }
581
582     void generatePatternCharacterFixed(TermGenerationState& state)
583     {
584         const RegisterID character = regT0;
585         const RegisterID countRegister = regT1;
586         PatternTerm& term = state.term();
587         UChar ch = term.patternCharacter;
588
589         move(index, countRegister);
590         sub32(Imm32(term.quantityCount), countRegister);
591
592         Label loop(this);
593         if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
594             load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character);
595             or32(Imm32(32), character);
596             state.jumpToBacktrack(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))), this);
597         } else {
598             ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
599             state.jumpToBacktrack(branch16(NotEqual, BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), Imm32(ch)), this);
600         }
601         add32(Imm32(1), countRegister);
602         branch32(NotEqual, countRegister, index).linkTo(loop, this);
603     }
604
605     void generatePatternCharacterGreedy(TermGenerationState& state)
606     {
607         const RegisterID character = regT0;
608         const RegisterID countRegister = regT1;
609         PatternTerm& term = state.term();
610         UChar ch = term.patternCharacter;
611     
612         move(Imm32(0), countRegister);
613
614         JumpList failures;
615         Label loop(this);
616         failures.append(atEndOfInput());
617         if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
618             readCharacter(state.inputOffset(), character);
619             or32(Imm32(32), character);
620             failures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
621         } else {
622             ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
623             failures.append(jumpIfCharNotEquals(ch, state.inputOffset()));
624         }
625
626         add32(Imm32(1), countRegister);
627         add32(Imm32(1), index);
628         if (term.quantityCount != 0xffffffff) {
629             branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
630             failures.append(jump());
631         } else
632             jump(loop);
633
634         Label backtrackBegin(this);
635         loadFromFrame(term.frameLocation, countRegister);
636         state.jumpToBacktrack(branchTest32(Zero, countRegister), this);
637         sub32(Imm32(1), countRegister);
638         sub32(Imm32(1), index);
639
640         failures.link(this);
641
642         storeToFrame(countRegister, term.frameLocation);
643
644         state.setBacktrackGenerated(backtrackBegin);
645     }
646
647     void generatePatternCharacterNonGreedy(TermGenerationState& state)
648     {
649         const RegisterID character = regT0;
650         const RegisterID countRegister = regT1;
651         PatternTerm& term = state.term();
652         UChar ch = term.patternCharacter;
653     
654         move(Imm32(0), countRegister);
655
656         Jump firstTimeDoNothing = jump();
657
658         Label hardFail(this);
659         sub32(countRegister, index);
660         state.jumpToBacktrack(jump(), this);
661
662         Label backtrackBegin(this);
663         loadFromFrame(term.frameLocation, countRegister);
664
665         atEndOfInput().linkTo(hardFail, this);
666         if (term.quantityCount != 0xffffffff)
667             branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
668         if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
669             readCharacter(state.inputOffset(), character);
670             or32(Imm32(32), character);
671             branch32(NotEqual, character, Imm32(Unicode::toLower(ch))).linkTo(hardFail, this);
672         } else {
673             ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
674             jumpIfCharNotEquals(ch, state.inputOffset()).linkTo(hardFail, this);
675         }
676
677         add32(Imm32(1), countRegister);
678         add32(Imm32(1), index);
679
680         firstTimeDoNothing.link(this);
681         storeToFrame(countRegister, term.frameLocation);
682
683         state.setBacktrackGenerated(backtrackBegin);
684     }
685
686     void generateCharacterClassSingle(TermGenerationState& state)
687     {
688         const RegisterID character = regT0;
689         PatternTerm& term = state.term();
690
691         JumpList matchDest;
692         readCharacter(state.inputOffset(), character);
693         matchCharacterClass(character, matchDest, term.characterClass);
694
695         if (term.invertOrCapture)
696             state.jumpToBacktrack(matchDest, this);
697         else {
698             state.jumpToBacktrack(jump(), this);
699             matchDest.link(this);
700         }
701     }
702
703     void generateCharacterClassFixed(TermGenerationState& state)
704     {
705         const RegisterID character = regT0;
706         const RegisterID countRegister = regT1;
707         PatternTerm& term = state.term();
708
709         move(index, countRegister);
710         sub32(Imm32(term.quantityCount), countRegister);
711
712         Label loop(this);
713         JumpList matchDest;
714         load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character);
715         matchCharacterClass(character, matchDest, term.characterClass);
716
717         if (term.invertOrCapture)
718             state.jumpToBacktrack(matchDest, this);
719         else {
720             state.jumpToBacktrack(jump(), this);
721             matchDest.link(this);
722         }
723
724         add32(Imm32(1), countRegister);
725         branch32(NotEqual, countRegister, index).linkTo(loop, this);
726     }
727
728     void generateCharacterClassGreedy(TermGenerationState& state)
729     {
730         const RegisterID character = regT0;
731         const RegisterID countRegister = regT1;
732         PatternTerm& term = state.term();
733     
734         move(Imm32(0), countRegister);
735
736         JumpList failures;
737         Label loop(this);
738         failures.append(atEndOfInput());
739
740         if (term.invertOrCapture) {
741             readCharacter(state.inputOffset(), character);
742             matchCharacterClass(character, failures, term.characterClass);
743         } else {
744             JumpList matchDest;
745             readCharacter(state.inputOffset(), character);
746             matchCharacterClass(character, matchDest, term.characterClass);
747             failures.append(jump());
748             matchDest.link(this);
749         }
750
751         add32(Imm32(1), countRegister);
752         add32(Imm32(1), index);
753         if (term.quantityCount != 0xffffffff) {
754             branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
755             failures.append(jump());
756         } else
757             jump(loop);
758
759         Label backtrackBegin(this);
760         loadFromFrame(term.frameLocation, countRegister);
761         state.jumpToBacktrack(branchTest32(Zero, countRegister), this);
762         sub32(Imm32(1), countRegister);
763         sub32(Imm32(1), index);
764
765         failures.link(this);
766
767         storeToFrame(countRegister, term.frameLocation);
768
769         state.setBacktrackGenerated(backtrackBegin);
770     }
771
772     void generateCharacterClassNonGreedy(TermGenerationState& state)
773     {
774         const RegisterID character = regT0;
775         const RegisterID countRegister = regT1;
776         PatternTerm& term = state.term();
777     
778         move(Imm32(0), countRegister);
779
780         Jump firstTimeDoNothing = jump();
781
782         Label hardFail(this);
783         sub32(countRegister, index);
784         state.jumpToBacktrack(jump(), this);
785
786         Label backtrackBegin(this);
787         loadFromFrame(term.frameLocation, countRegister);
788
789         atEndOfInput().linkTo(hardFail, this);
790         branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
791
792         JumpList matchDest;
793         readCharacter(state.inputOffset(), character);
794         matchCharacterClass(character, matchDest, term.characterClass);
795
796         if (term.invertOrCapture)
797             matchDest.linkTo(hardFail, this);
798         else {
799             jump(hardFail);
800             matchDest.link(this);
801         }
802
803         add32(Imm32(1), countRegister);
804         add32(Imm32(1), index);
805
806         firstTimeDoNothing.link(this);
807         storeToFrame(countRegister, term.frameLocation);
808
809         state.setBacktrackGenerated(backtrackBegin);
810     }
811
812     void generateParenthesesDisjunction(PatternTerm& parenthesesTerm, TermGenerationState& state, unsigned alternativeFrameLocation)
813     {
814         ASSERT((parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern) || (parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion));
815         ASSERT(parenthesesTerm.quantityCount == 1);
816     
817         PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction;
818         unsigned preCheckedCount = ((parenthesesTerm.quantityType == QuantifierFixedCount) && (parenthesesTerm.type != PatternTerm::TypeParentheticalAssertion)) ? disjunction->m_minimumSize : 0;
819
820         if (disjunction->m_alternatives.size() == 1) {
821             state.resetAlternative();
822             ASSERT(state.alternativeValid());
823             PatternAlternative* alternative = state.alternative();
824             optimizeAlternative(alternative);
825
826             int countToCheck = alternative->m_minimumSize - preCheckedCount;
827             if (countToCheck) {
828                 ASSERT((parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion) || (parenthesesTerm.quantityType != QuantifierFixedCount));
829
830                 // FIXME: This is quite horrible.  The call to 'plantJumpToBacktrackIfExists'
831                 // will be forced to always trampoline into here, just to decrement the index.
832                 // Ick. 
833                 Jump skip = jump();
834
835                 Label backtrackBegin(this);
836                 sub32(Imm32(countToCheck), index);
837                 state.addBacktrackJump(jump());
838                 
839                 skip.link(this);
840
841                 state.setBacktrackGenerated(backtrackBegin);
842
843                 state.jumpToBacktrack(jumpIfNoAvailableInput(countToCheck), this);
844                 state.checkedTotal += countToCheck;
845             }
846
847             for (state.resetTerm(); state.termValid(); state.nextTerm())
848                 generateTerm(state);
849
850             state.checkedTotal -= countToCheck;
851         } else {
852             JumpList successes;
853
854             for (state.resetAlternative(); state.alternativeValid(); state.nextAlternative()) {
855
856                 PatternAlternative* alternative = state.alternative();
857                 optimizeAlternative(alternative);
858
859                 ASSERT(alternative->m_minimumSize >= preCheckedCount);
860                 int countToCheck = alternative->m_minimumSize - preCheckedCount;
861                 if (countToCheck) {
862                     state.addBacktrackJump(jumpIfNoAvailableInput(countToCheck));
863                     state.checkedTotal += countToCheck;
864                 }
865
866                 for (state.resetTerm(); state.termValid(); state.nextTerm())
867                     generateTerm(state);
868
869                 // Matched an alternative.
870                 DataLabelPtr dataLabel = storeToFrameWithPatch(alternativeFrameLocation);
871                 successes.append(jump());
872
873                 // Alternative did not match.
874                 Label backtrackLocation(this);
875                 
876                 // Can we backtrack the alternative? - if so, do so.  If not, just fall through to the next one.
877                 state.plantJumpToBacktrackIfExists(this);
878                 
879                 state.linkAlternativeBacktracks(this);
880
881                 if (countToCheck) {
882                     sub32(Imm32(countToCheck), index);
883                     state.checkedTotal -= countToCheck;
884                 }
885
886                 m_backtrackRecords.append(AlternativeBacktrackRecord(dataLabel, backtrackLocation));
887             }
888             // We fall through to here when the last alternative fails.
889             // Add a backtrack out of here for the parenthese handling code to link up.
890             state.addBacktrackJump(jump());
891
892             // Generate a trampoline for the parens code to backtrack to, to retry the
893             // next alternative.
894             state.setBacktrackGenerated(label());
895             loadFromFrameAndJump(alternativeFrameLocation);
896
897             // FIXME: both of the above hooks are a little inefficient, in that you
898             // may end up trampolining here, just to trampoline back out to the
899             // parentheses code, or vice versa.  We can probably eliminate a jump
900             // by restructuring, but coding this way for now for simplicity during
901             // development.
902
903             successes.link(this);
904         }
905     }
906
907     void generateParenthesesSingle(TermGenerationState& state)
908     {
909         const RegisterID indexTemporary = regT0;
910         PatternTerm& term = state.term();
911         PatternDisjunction* disjunction = term.parentheses.disjunction;
912         ASSERT(term.quantityCount == 1);
913
914         if (!term.parentheses.isCopy) {
915             m_shouldFallBack = true;
916             return;
917         }
918
919         unsigned preCheckedCount = ((term.quantityCount == 1) && (term.quantityType == QuantifierFixedCount)) ? disjunction->m_minimumSize : 0;
920
921         unsigned parenthesesFrameLocation = term.frameLocation;
922         unsigned alternativeFrameLocation = parenthesesFrameLocation;
923         if (term.quantityType != QuantifierFixedCount)
924             alternativeFrameLocation += RegexStackSpaceForBackTrackInfoParenthesesOnce;
925
926         // optimized case - no capture & no quantifier can be handled in a light-weight manner.
927         if (!term.invertOrCapture && (term.quantityType == QuantifierFixedCount)) {
928             TermGenerationState parenthesesState(disjunction, state.checkedTotal);
929             generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
930             // this expects that any backtracks back out of the parentheses will be in the
931             // parenthesesState's backTrackJumps vector, and that if they need backtracking
932             // they will have set an entry point on the parenthesesState's backtrackLabel.
933             state.propagateBacktrackingFrom(parenthesesState, this);
934         } else {
935             Jump nonGreedySkipParentheses;
936             Label nonGreedyTryParentheses;
937             if (term.quantityType == QuantifierGreedy)
938                 storeToFrame(Imm32(1), parenthesesFrameLocation);
939             else if (term.quantityType == QuantifierNonGreedy) {
940                 storeToFrame(Imm32(0), parenthesesFrameLocation);
941                 nonGreedySkipParentheses = jump();
942                 nonGreedyTryParentheses = label();
943                 storeToFrame(Imm32(1), parenthesesFrameLocation);
944             }
945
946             // store the match start index
947             if (term.invertOrCapture) {
948                 int inputOffset = state.inputOffset() - preCheckedCount;
949                 if (inputOffset) {
950                     move(index, indexTemporary);
951                     add32(Imm32(inputOffset), indexTemporary);
952                     store32(indexTemporary, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
953                 } else
954                     store32(index, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
955             }
956
957             // generate the body of the parentheses
958             TermGenerationState parenthesesState(disjunction, state.checkedTotal);
959             generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
960
961             // store the match end index
962             if (term.invertOrCapture) {
963                 int inputOffset = state.inputOffset();
964                 if (inputOffset) {
965                     move(index, indexTemporary);
966                     add32(Imm32(state.inputOffset()), indexTemporary);
967                     store32(indexTemporary, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
968                 } else
969                     store32(index, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
970             }
971             Jump success = jump();
972
973             // A failure AFTER the parens jumps here
974             Label backtrackFromAfterParens(this);
975
976             if (term.quantityType == QuantifierGreedy) {
977                 // If this is zero we have now tested with both with and without the parens.
978                 loadFromFrame(parenthesesFrameLocation, indexTemporary);
979                 state.jumpToBacktrack(branchTest32(Zero, indexTemporary), this);
980             } else if (term.quantityType == QuantifierNonGreedy) {
981                 // If this is zero we have now tested with both with and without the parens.
982                 loadFromFrame(parenthesesFrameLocation, indexTemporary);
983                 branchTest32(Zero, indexTemporary).linkTo(nonGreedyTryParentheses, this);
984             }
985
986             parenthesesState.plantJumpToBacktrackIfExists(this);
987             // A failure WITHIN the parens jumps here
988             parenthesesState.linkAlternativeBacktracks(this);
989             if (term.invertOrCapture) {
990                 store32(Imm32(-1), Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
991                 store32(Imm32(-1), Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
992             }
993
994             if (term.quantityType == QuantifierGreedy)
995                 storeToFrame(Imm32(0), parenthesesFrameLocation);
996             else
997                 state.jumpToBacktrack(jump(), this);
998
999             state.setBacktrackGenerated(backtrackFromAfterParens);
1000             if (term.quantityType == QuantifierNonGreedy)
1001                 nonGreedySkipParentheses.link(this);
1002             success.link(this);
1003         }
1004     }
1005
1006     void generateParenthesesGreedyNoBacktrack(TermGenerationState& state)
1007     {
1008         PatternTerm& parenthesesTerm = state.term();
1009         PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction;
1010         ASSERT(parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern);
1011         ASSERT(parenthesesTerm.quantityCount != 1); // Handled by generateParenthesesSingle.
1012
1013         // Capturing not yet implemented!
1014         if (parenthesesTerm.invertOrCapture) {
1015             m_shouldFallBack = true;
1016             return;
1017         }
1018
1019         // Quantification limit not yet implemented!
1020         if (parenthesesTerm.quantityCount != 0xffffffff) {
1021             m_shouldFallBack = true;
1022             return;
1023         }
1024
1025         TermGenerationState parenthesesState(disjunction, state.checkedTotal);
1026
1027         Label matchAgain(this);
1028         for (parenthesesState.resetAlternative(); parenthesesState.alternativeValid(); parenthesesState.nextAlternative()) {
1029
1030             PatternAlternative* alternative = parenthesesState.alternative();
1031             optimizeAlternative(alternative);
1032
1033             int countToCheck = alternative->m_minimumSize;
1034             if (countToCheck) {
1035                 parenthesesState.addBacktrackJump(jumpIfNoAvailableInput(countToCheck));
1036                 parenthesesState.checkedTotal += countToCheck;
1037             }
1038
1039             for (parenthesesState.resetTerm(); parenthesesState.termValid(); parenthesesState.nextTerm())
1040                 generateTerm(parenthesesState);
1041
1042             // If we get here, we matched! Limit not yet supported, so just try to match more!
1043             jump(matchAgain);
1044             
1045             parenthesesState.linkAlternativeBacktracks(this);
1046             // We get here if the alternative fails to match - fall through to the next iteration, or out of the loop.
1047
1048             if (countToCheck) {
1049                 sub32(Imm32(countToCheck), index);
1050                 parenthesesState.checkedTotal -= countToCheck;
1051             }
1052         }
1053
1054         // If the last alternative falls through to here, we have a failed match...
1055         // Which means that we match whatever we have matched up to this point (even if nothing).
1056     }
1057
1058     void generateParentheticalAssertion(TermGenerationState& state)
1059     {
1060         PatternTerm& term = state.term();
1061         PatternDisjunction* disjunction = term.parentheses.disjunction;
1062         ASSERT(term.quantityCount == 1);
1063         ASSERT(term.quantityType == QuantifierFixedCount);
1064
1065         unsigned parenthesesFrameLocation = term.frameLocation;
1066         unsigned alternativeFrameLocation = parenthesesFrameLocation + RegexStackSpaceForBackTrackInfoParentheticalAssertion;
1067
1068         int countCheckedAfterAssertion = state.checkedTotal - term.inputPosition;
1069
1070         if (term.invertOrCapture) {
1071             // Inverted case
1072             storeToFrame(index, parenthesesFrameLocation);
1073
1074             state.checkedTotal -= countCheckedAfterAssertion;
1075             if (countCheckedAfterAssertion)
1076                 sub32(Imm32(countCheckedAfterAssertion), index);
1077
1078             TermGenerationState parenthesesState(disjunction, state.checkedTotal);
1079             generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
1080             // Success! - which means - Fail!
1081             loadFromFrame(parenthesesFrameLocation, index);
1082             state.jumpToBacktrack(jump(), this);
1083
1084             // And fail means success.
1085             parenthesesState.linkAlternativeBacktracks(this);
1086             loadFromFrame(parenthesesFrameLocation, index);
1087
1088             state.checkedTotal += countCheckedAfterAssertion;
1089         } else {
1090             // Normal case
1091             storeToFrame(index, parenthesesFrameLocation);
1092
1093             state.checkedTotal -= countCheckedAfterAssertion;
1094             if (countCheckedAfterAssertion)
1095                 sub32(Imm32(countCheckedAfterAssertion), index);
1096
1097             TermGenerationState parenthesesState(disjunction, state.checkedTotal);
1098             generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
1099             // Success! - which means - Success!
1100             loadFromFrame(parenthesesFrameLocation, index);
1101             Jump success = jump();
1102
1103             parenthesesState.linkAlternativeBacktracks(this);
1104             loadFromFrame(parenthesesFrameLocation, index);
1105             state.jumpToBacktrack(jump(), this);
1106
1107             success.link(this);
1108
1109             state.checkedTotal += countCheckedAfterAssertion;
1110         }
1111     }
1112
1113     void generateTerm(TermGenerationState& state)
1114     {
1115         PatternTerm& term = state.term();
1116
1117         switch (term.type) {
1118         case PatternTerm::TypeAssertionBOL:
1119             generateAssertionBOL(state);
1120             break;
1121         
1122         case PatternTerm::TypeAssertionEOL:
1123             generateAssertionEOL(state);
1124             break;
1125         
1126         case PatternTerm::TypeAssertionWordBoundary:
1127             generateAssertionWordBoundary(state);
1128             break;
1129         
1130         case PatternTerm::TypePatternCharacter:
1131             switch (term.quantityType) {
1132             case QuantifierFixedCount:
1133                 if (term.quantityCount == 1) {
1134                     if (state.isSinglePatternCharacterLookaheadTerm() && (state.lookaheadTerm().inputPosition == (term.inputPosition + 1))) {
1135                         generatePatternCharacterPair(state);
1136                         state.nextTerm();
1137                     } else
1138                         generatePatternCharacterSingle(state);
1139                 } else
1140                     generatePatternCharacterFixed(state);
1141                 break;
1142             case QuantifierGreedy:
1143                 generatePatternCharacterGreedy(state);
1144                 break;
1145             case QuantifierNonGreedy:
1146                 generatePatternCharacterNonGreedy(state);
1147                 break;
1148             }
1149             break;
1150
1151         case PatternTerm::TypeCharacterClass:
1152             switch (term.quantityType) {
1153             case QuantifierFixedCount:
1154                 if (term.quantityCount == 1)
1155                     generateCharacterClassSingle(state);
1156                 else
1157                     generateCharacterClassFixed(state);
1158                 break;
1159             case QuantifierGreedy:
1160                 generateCharacterClassGreedy(state);
1161                 break;
1162             case QuantifierNonGreedy:
1163                 generateCharacterClassNonGreedy(state);
1164                 break;
1165             }
1166             break;
1167
1168         case PatternTerm::TypeBackReference:
1169             m_shouldFallBack = true;
1170             break;
1171
1172         case PatternTerm::TypeForwardReference:
1173             break;
1174
1175         case PatternTerm::TypeParenthesesSubpattern:
1176             if (term.quantityCount == 1) {
1177                 generateParenthesesSingle(state);
1178                 break;
1179             } else if (state.isLastTerm() && state.isMainDisjunction()) { // Is this is the last term of the main disjunction?
1180                 // If this has a greedy quantifier, then it will never need to backtrack!
1181                 if (term.quantityType == QuantifierGreedy) {
1182                     generateParenthesesGreedyNoBacktrack(state);
1183                     break;
1184                 }
1185             }
1186             m_shouldFallBack = true;
1187             break;
1188
1189         case PatternTerm::TypeParentheticalAssertion:
1190             generateParentheticalAssertion(state);
1191             break;
1192         }
1193     }
1194
1195     void generateDisjunction(PatternDisjunction* disjunction)
1196     {
1197         TermGenerationState state(disjunction, 0);
1198         state.resetAlternative();
1199
1200         // Plant a check to see if there is sufficient input available to run the first alternative.
1201         // Jumping back to the label 'firstAlternative' will get to this check, jumping to
1202         // 'firstAlternativeInputChecked' will jump directly to matching the alternative having
1203         // skipped this check.
1204
1205         Label firstAlternative(this);
1206
1207         // check availability for the next alternative
1208         int countCheckedForCurrentAlternative = 0;
1209         int countToCheckForFirstAlternative = 0;
1210         bool hasShorterAlternatives = false;
1211         JumpList notEnoughInputForPreviousAlternative;
1212
1213         if (state.alternativeValid()) {
1214             PatternAlternative* alternative = state.alternative();
1215             countToCheckForFirstAlternative = alternative->m_minimumSize;
1216             state.checkedTotal += countToCheckForFirstAlternative;
1217             if (countToCheckForFirstAlternative)
1218                 notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative));
1219             countCheckedForCurrentAlternative = countToCheckForFirstAlternative;
1220         }
1221
1222         Label firstAlternativeInputChecked(this);
1223
1224         while (state.alternativeValid()) {
1225             // Track whether any alternatives are shorter than the first one.
1226             hasShorterAlternatives = hasShorterAlternatives || (countCheckedForCurrentAlternative < countToCheckForFirstAlternative);
1227
1228             PatternAlternative* alternative = state.alternative();
1229             optimizeAlternative(alternative);
1230
1231             for (state.resetTerm(); state.termValid(); state.nextTerm())
1232                 generateTerm(state);
1233
1234             // If we get here, the alternative matched.
1235             if (m_pattern.m_body->m_callFrameSize)
1236                 addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
1237             
1238             ASSERT(index != returnRegister);
1239             if (m_pattern.m_body->m_hasFixedSize) {
1240                 move(index, returnRegister);
1241                 if (alternative->m_minimumSize)
1242                     sub32(Imm32(alternative->m_minimumSize), returnRegister);
1243             } else
1244                 pop(returnRegister);
1245             store32(index, Address(output, 4));
1246             store32(returnRegister, output);
1247
1248             generateReturn();
1249
1250             state.nextAlternative();
1251
1252             // if there are any more alternatives, plant the check for input before looping.
1253             if (state.alternativeValid()) {
1254                 PatternAlternative* nextAlternative = state.alternative();
1255                 int countToCheckForNextAlternative = nextAlternative->m_minimumSize;
1256
1257                 if (countCheckedForCurrentAlternative > countToCheckForNextAlternative) { // CASE 1: current alternative was longer than the next one.
1258                     // If we get here, there the last input checked failed.
1259                     notEnoughInputForPreviousAlternative.link(this);
1260
1261                     // Check if sufficent input available to run the next alternative 
1262                     notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative));
1263                     // We are now in the correct state to enter the next alternative; this add is only required
1264                     // to mirror and revert operation of the sub32, just below.
1265                     add32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index);
1266
1267                     // If we get here, there the last input checked passed.
1268                     state.linkAlternativeBacktracks(this);
1269                     // No need to check if we can run the next alternative, since it is shorter -
1270                     // just update index.
1271                     sub32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index);
1272                 } else if (countCheckedForCurrentAlternative < countToCheckForNextAlternative) { // CASE 2: next alternative is longer than the current one.
1273                     // If we get here, there the last input checked failed.
1274                     // If there is insufficient input to run the current alternative, and the next alternative is longer,
1275                     // then there is definitely not enough input to run it - don't even check. Just adjust index, as if
1276                     // we had checked.
1277                     notEnoughInputForPreviousAlternative.link(this);
1278                     add32(Imm32(countToCheckForNextAlternative - countCheckedForCurrentAlternative), index);
1279                     notEnoughInputForPreviousAlternative.append(jump());
1280
1281                     // The next alternative is longer than the current one; check the difference.
1282                     state.linkAlternativeBacktracks(this);
1283                     notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative));
1284                 } else { // CASE 3: Both alternatives are the same length.
1285                     ASSERT(countCheckedForCurrentAlternative == countToCheckForNextAlternative);
1286
1287                     // If the next alterative is the same length as this one, then no need to check the input -
1288                     // if there was sufficent input to run the current alternative then there is sufficient
1289                     // input to run the next one; if not, there isn't.
1290                     state.linkAlternativeBacktracks(this);
1291                 }
1292
1293                 state.checkedTotal -= countCheckedForCurrentAlternative;
1294                 countCheckedForCurrentAlternative = countToCheckForNextAlternative;
1295                 state.checkedTotal += countCheckedForCurrentAlternative;
1296             }
1297         }
1298         
1299         // If we get here, all Alternatives failed...
1300
1301         state.checkedTotal -= countCheckedForCurrentAlternative;
1302
1303         // How much more input need there be to be able to retry from the first alternative?
1304         // examples:
1305         //   /yarr_jit/ or /wrec|pcre/
1306         //     In these examples we need check for one more input before looping.
1307         //   /yarr_jit|pcre/
1308         //     In this case we need check for 5 more input to loop (+4 to allow for the first alterative
1309         //     being four longer than the last alternative checked, and another +1 to effectively move
1310         //     the start position along by one).
1311         //   /yarr|rules/ or /wrec|notsomuch/
1312         //     In these examples, provided that there was sufficient input to have just been matching for
1313         //     the second alternative we can loop without checking for available input (since the second
1314         //     alternative is longer than the first).  In the latter example we need to decrement index
1315         //     (by 4) so the start position is only progressed by 1 from the last iteration.
1316         int incrementForNextIter = (countToCheckForFirstAlternative - countCheckedForCurrentAlternative) + 1;
1317
1318         // First, deal with the cases where there was sufficient input to try the last alternative.
1319         if (incrementForNextIter > 0) // We need to check for more input anyway, fall through to the checking below.
1320             state.linkAlternativeBacktracks(this);
1321         else if (m_pattern.m_body->m_hasFixedSize && !incrementForNextIter) // No need to update anything, link these backtracks straight to the to pof the loop!
1322             state.linkAlternativeBacktracksTo(firstAlternativeInputChecked, this);
1323         else { // no need to check the input, but we do have some bookkeeping to do first.
1324             state.linkAlternativeBacktracks(this);
1325
1326             // Where necessary update our preserved start position.
1327             if (!m_pattern.m_body->m_hasFixedSize) {
1328                 move(index, regT0);
1329                 sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0);
1330                 poke(regT0, m_pattern.m_body->m_callFrameSize);
1331             }
1332
1333             // Update index if necessary, and loop (without checking).
1334             if (incrementForNextIter)
1335                 add32(Imm32(incrementForNextIter), index);
1336             jump().linkTo(firstAlternativeInputChecked, this);
1337         }
1338
1339         notEnoughInputForPreviousAlternative.link(this);
1340         // Update our idea of the start position, if we're tracking this.
1341         if (!m_pattern.m_body->m_hasFixedSize) {
1342             if (countCheckedForCurrentAlternative - 1) {
1343                 move(index, regT0);
1344                 sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0);
1345                 poke(regT0, m_pattern.m_body->m_callFrameSize);
1346             } else
1347                 poke(index, m_pattern.m_body->m_callFrameSize);
1348         }
1349         // Check if there is sufficent input to run the first alternative again.
1350         jumpIfAvailableInput(incrementForNextIter).linkTo(firstAlternativeInputChecked, this);
1351         // No - insufficent input to run the first alteranative, are there any other alternatives we
1352         // might need to check?  If so, the last check will have left the index incremented by
1353         // (countToCheckForFirstAlternative + 1), so we need test whether countToCheckForFirstAlternative
1354         // LESS input is available, to have the effect of just progressing the start position by 1
1355         // from the last iteration.  If this check passes we can just jump up to the check associated
1356         // with the first alternative in the loop.  This is a bit sad, since we'll end up trying the
1357         // first alternative again, and this check will fail (otherwise the check planted just above
1358         // here would have passed).  This is a bit sad, however it saves trying to do something more
1359         // complex here in compilation, and in the common case we should end up coallescing the checks.
1360         //
1361         // FIXME: a nice improvement here may be to stop trying to match sooner, based on the least
1362         // of the minimum-alternative-lengths.  E.g. if I have two alternatives of length 200 and 150,
1363         // and a string of length 100, we'll end up looping index from 0 to 100, checking whether there
1364         // is sufficient input to run either alternative (constantly failing).  If there had been only
1365         // one alternative, or if the shorter alternative had come first, we would have terminated
1366         // immediately. :-/
1367         if (hasShorterAlternatives)
1368             jumpIfAvailableInput(-countToCheckForFirstAlternative).linkTo(firstAlternative, this);
1369         // index will now be a bit garbled (depending on whether 'hasShorterAlternatives' is true,
1370         // it has either been incremented by 1 or by (countToCheckForFirstAlternative + 1) ... 
1371         // but since we're about to return a failure this doesn't really matter!)
1372
1373         unsigned frameSize = m_pattern.m_body->m_callFrameSize;
1374         if (!m_pattern.m_body->m_hasFixedSize)
1375             ++frameSize;
1376         if (frameSize)
1377             addPtr(Imm32(frameSize * sizeof(void*)), stackPointerRegister);
1378
1379         move(Imm32(-1), returnRegister);
1380
1381         generateReturn();
1382     }
1383
1384     void generateEnter()
1385     {
1386 #if CPU(X86_64)
1387         push(X86Registers::ebp);
1388         move(stackPointerRegister, X86Registers::ebp);
1389         push(X86Registers::ebx);
1390 #elif CPU(X86)
1391         push(X86Registers::ebp);
1392         move(stackPointerRegister, X86Registers::ebp);
1393         // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
1394         push(X86Registers::ebx);
1395         push(X86Registers::edi);
1396         push(X86Registers::esi);
1397         // load output into edi (2 = saved ebp + return address).
1398     #if COMPILER(MSVC)
1399         loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input);
1400         loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index);
1401         loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length);
1402         loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output);
1403     #else
1404         loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output);
1405     #endif
1406 #elif CPU(ARM)
1407         push(ARMRegisters::r4);
1408         push(ARMRegisters::r5);
1409         push(ARMRegisters::r6);
1410         move(ARMRegisters::r3, output);
1411 #elif CPU(MIPS)
1412         // Do nothing.
1413 #endif
1414     }
1415
1416     void generateReturn()
1417     {
1418 #if CPU(X86_64)
1419         pop(X86Registers::ebx);
1420         pop(X86Registers::ebp);
1421 #elif CPU(X86)
1422         pop(X86Registers::esi);
1423         pop(X86Registers::edi);
1424         pop(X86Registers::ebx);
1425         pop(X86Registers::ebp);
1426 #elif CPU(ARM)
1427         pop(ARMRegisters::r6);
1428         pop(ARMRegisters::r5);
1429         pop(ARMRegisters::r4);
1430 #elif CPU(MIPS)
1431         // Do nothing
1432 #endif
1433         ret();
1434     }
1435
1436 public:
1437     RegexGenerator(RegexPattern& pattern)
1438         : m_pattern(pattern)
1439         , m_shouldFallBack(false)
1440     {
1441     }
1442
1443     void generate()
1444     {
1445         generateEnter();
1446
1447         // TODO: do I really want this on the stack?
1448         if (!m_pattern.m_body->m_hasFixedSize)
1449             push(index);
1450
1451         if (m_pattern.m_body->m_callFrameSize)
1452             subPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
1453
1454         generateDisjunction(m_pattern.m_body);
1455     }
1456
1457     void compile(JSGlobalData* globalData, RegexCodeBlock& jitObject)
1458     {
1459         generate();
1460
1461         LinkBuffer patchBuffer(this, globalData->executableAllocator.poolForSize(size()));
1462
1463         for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
1464             patchBuffer.patch(m_backtrackRecords[i].dataLabel, patchBuffer.locationOf(m_backtrackRecords[i].backtrackLocation));
1465
1466         jitObject.set(patchBuffer.finalizeCode());
1467     }
1468
1469     bool shouldFallBack()
1470     {
1471         return m_shouldFallBack;
1472     }
1473
1474 private:
1475     RegexPattern& m_pattern;
1476     bool m_shouldFallBack;
1477     Vector<AlternativeBacktrackRecord> m_backtrackRecords;
1478 };
1479
1480 void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& patternString, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline)
1481 {
1482     RegexPattern pattern(ignoreCase, multiline);
1483     if ((error = compileRegex(patternString, pattern)))
1484         return;
1485     numSubpatterns = pattern.m_numSubpatterns;
1486
1487     if (!pattern.m_containsBackreferences) {
1488         RegexGenerator generator(pattern);
1489         generator.compile(globalData, jitObject);
1490         if (!generator.shouldFallBack())
1491             return;
1492     }
1493
1494     JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase;
1495     JSRegExpMultilineOption multilineOption = multiline ? JSRegExpMultiline : JSRegExpSingleLine;
1496     jitObject.setFallback(jsRegExpCompile(reinterpret_cast<const UChar*>(patternString.data()), patternString.size(), ignoreCaseOption, multilineOption, &numSubpatterns, &error));
1497 }
1498
1499 }}
1500
1501 #endif
1502
1503
1504
1505
1506