Temporarily rolling out r60267, I appear to have hoesed perf at the last minute....
[WebKit-https.git] / JavaScriptCore / yarr / RegexJIT.cpp
1 /*
2  * Copyright (C) 2009 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
24  */
25
26 #include "config.h"
27 #include "RegexJIT.h"
28
29 #include "ASCIICType.h"
30 #include "JSGlobalData.h"
31 #include "LinkBuffer.h"
32 #include "MacroAssembler.h"
33 #include "RegexCompiler.h"
34
35 #include "pcre.h" // temporary, remove when fallback is removed.
36
37 #if ENABLE(YARR_JIT)
38
39 using namespace WTF;
40
41 namespace JSC { namespace Yarr {
42
43 class RegexGenerator : private MacroAssembler {
44     friend void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
45
46 #if CPU(ARM)
47     static const RegisterID input = ARMRegisters::r0;
48     static const RegisterID index = ARMRegisters::r1;
49     static const RegisterID length = ARMRegisters::r2;
50     static const RegisterID output = ARMRegisters::r4;
51
52     static const RegisterID regT0 = ARMRegisters::r5;
53     static const RegisterID regT1 = ARMRegisters::r6;
54
55     static const RegisterID returnRegister = ARMRegisters::r0;
56 #elif CPU(MIPS)
57     static const RegisterID input = MIPSRegisters::a0;
58     static const RegisterID index = MIPSRegisters::a1;
59     static const RegisterID length = MIPSRegisters::a2;
60     static const RegisterID output = MIPSRegisters::a3;
61
62     static const RegisterID regT0 = MIPSRegisters::t4;
63     static const RegisterID regT1 = MIPSRegisters::t5;
64
65     static const RegisterID returnRegister = MIPSRegisters::v0;
66 #elif CPU(X86)
67     static const RegisterID input = X86Registers::eax;
68     static const RegisterID index = X86Registers::edx;
69     static const RegisterID length = X86Registers::ecx;
70     static const RegisterID output = X86Registers::edi;
71
72     static const RegisterID regT0 = X86Registers::ebx;
73     static const RegisterID regT1 = X86Registers::esi;
74
75     static const RegisterID returnRegister = X86Registers::eax;
76 #elif CPU(X86_64)
77     static const RegisterID input = X86Registers::edi;
78     static const RegisterID index = X86Registers::esi;
79     static const RegisterID length = X86Registers::edx;
80     static const RegisterID output = X86Registers::ecx;
81
82     static const RegisterID regT0 = X86Registers::eax;
83     static const RegisterID regT1 = X86Registers::ebx;
84
85     static const RegisterID returnRegister = X86Registers::eax;
86 #endif
87
88     void optimizeAlternative(PatternAlternative* alternative)
89     {
90         if (!alternative->m_terms.size())
91             return;
92
93         for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) {
94             PatternTerm& term = alternative->m_terms[i];
95             PatternTerm& nextTerm = alternative->m_terms[i + 1];
96
97             if ((term.type == PatternTerm::TypeCharacterClass)
98                 && (term.quantityType == QuantifierFixedCount)
99                 && (nextTerm.type == PatternTerm::TypePatternCharacter)
100                 && (nextTerm.quantityType == QuantifierFixedCount)) {
101                 PatternTerm termCopy = term;
102                 alternative->m_terms[i] = nextTerm;
103                 alternative->m_terms[i + 1] = termCopy;
104             }
105         }
106     }
107
108     void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount)
109     {
110         do {
111             // pick which range we're going to generate
112             int which = count >> 1;
113             char lo = ranges[which].begin;
114             char hi = ranges[which].end;
115             
116             // check if there are any ranges or matches below lo.  If not, just jl to failure -
117             // if there is anything else to check, check that first, if it falls through jmp to failure.
118             if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
119                 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
120                 
121                 // generate code for all ranges before this one
122                 if (which)
123                     matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
124                 
125                 while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
126                     matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
127                     ++*matchIndex;
128                 }
129                 failures.append(jump());
130
131                 loOrAbove.link(this);
132             } else if (which) {
133                 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
134
135                 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
136                 failures.append(jump());
137
138                 loOrAbove.link(this);
139             } else
140                 failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));
141
142             while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
143                 ++*matchIndex;
144
145             matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
146             // fall through to here, the value is above hi.
147
148             // shuffle along & loop around if there are any more matches to handle.
149             unsigned next = which + 1;
150             ranges += next;
151             count -= next;
152         } while (count);
153     }
154
155     void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
156     {
157         if (charClass->m_table) {
158             ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table->m_table));
159             matchDest.append(branchTest8(charClass->m_table->m_inverted ? Zero : NonZero, tableEntry));   
160             return;
161         }
162         Jump unicodeFail;
163         if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
164             Jump isAscii = branch32(LessThanOrEqual, character, Imm32(0x7f));
165         
166             if (charClass->m_matchesUnicode.size()) {
167                 for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
168                     UChar ch = charClass->m_matchesUnicode[i];
169                     matchDest.append(branch32(Equal, character, Imm32(ch)));
170                 }
171             }
172             
173             if (charClass->m_rangesUnicode.size()) {
174                 for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
175                     UChar lo = charClass->m_rangesUnicode[i].begin;
176                     UChar hi = charClass->m_rangesUnicode[i].end;
177                     
178                     Jump below = branch32(LessThan, character, Imm32(lo));
179                     matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
180                     below.link(this);
181                 }
182             }
183
184             unicodeFail = jump();
185             isAscii.link(this);
186         }
187
188         if (charClass->m_ranges.size()) {
189             unsigned matchIndex = 0;
190             JumpList failures; 
191             matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size());
192             while (matchIndex < charClass->m_matches.size())
193                 matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++])));
194
195             failures.link(this);
196         } else if (charClass->m_matches.size()) {
197             // optimization: gather 'a','A' etc back together, can mask & test once.
198             Vector<char> matchesAZaz;
199
200             for (unsigned i = 0; i < charClass->m_matches.size(); ++i) {
201                 char ch = charClass->m_matches[i];
202                 if (m_pattern.m_ignoreCase) {
203                     if (isASCIILower(ch)) {
204                         matchesAZaz.append(ch);
205                         continue;
206                     }
207                     if (isASCIIUpper(ch))
208                         continue;
209                 }
210                 matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
211             }
212
213             if (unsigned countAZaz = matchesAZaz.size()) {
214                 or32(Imm32(32), character);
215                 for (unsigned i = 0; i < countAZaz; ++i)
216                     matchDest.append(branch32(Equal, character, Imm32(matchesAZaz[i])));
217             }
218         }
219
220         if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size())
221             unicodeFail.link(this);
222     }
223
224     // Jumps if input not available; will have (incorrectly) incremented already!
225     Jump jumpIfNoAvailableInput(unsigned countToCheck)
226     {
227         add32(Imm32(countToCheck), index);
228         return branch32(Above, index, length);
229     }
230
231     Jump jumpIfAvailableInput(unsigned countToCheck)
232     {
233         add32(Imm32(countToCheck), index);
234         return branch32(BelowOrEqual, index, length);
235     }
236
237     Jump checkInput()
238     {
239         return branch32(BelowOrEqual, index, length);
240     }
241
242     Jump atEndOfInput()
243     {
244         return branch32(Equal, index, length);
245     }
246
247     Jump notAtEndOfInput()
248     {
249         return branch32(NotEqual, index, length);
250     }
251
252     Jump jumpIfCharEquals(UChar ch, int inputPosition)
253     {
254         return branch16(Equal, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
255     }
256
257     Jump jumpIfCharNotEquals(UChar ch, int inputPosition)
258     {
259         return branch16(NotEqual, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
260     }
261
262     void readCharacter(int inputPosition, RegisterID reg)
263     {
264         load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg);
265     }
266
267     void storeToFrame(RegisterID reg, unsigned frameLocation)
268     {
269         poke(reg, frameLocation);
270     }
271
272     void storeToFrame(Imm32 imm, unsigned frameLocation)
273     {
274         poke(imm, frameLocation);
275     }
276
277     DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
278     {
279         return storePtrWithPatch(ImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*)));
280     }
281
282     void loadFromFrame(unsigned frameLocation, RegisterID reg)
283     {
284         peek(reg, frameLocation);
285     }
286
287     void loadFromFrameAndJump(unsigned frameLocation)
288     {
289         jump(Address(stackPointerRegister, frameLocation * sizeof(void*)));
290     }
291
292     struct AlternativeBacktrackRecord {
293         DataLabelPtr dataLabel;
294         Label backtrackLocation;
295
296         AlternativeBacktrackRecord(DataLabelPtr dataLabel, Label backtrackLocation)
297             : dataLabel(dataLabel)
298             , backtrackLocation(backtrackLocation)
299         {
300         }
301     };
302
303     struct TermGenerationState {
304         TermGenerationState(PatternDisjunction* disjunction, unsigned checkedTotal)
305             : disjunction(disjunction)
306             , checkedTotal(checkedTotal)
307         {
308         }
309
310         void resetAlternative()
311         {
312             isBackTrackGenerated = false;
313             alt = 0;
314         }
315         bool alternativeValid()
316         {
317             return alt < disjunction->m_alternatives.size();
318         }
319         void nextAlternative()
320         {
321             ++alt;
322         }
323         PatternAlternative* alternative()
324         {
325             return disjunction->m_alternatives[alt];
326         }
327
328         void resetTerm()
329         {
330             ASSERT(alternativeValid());
331             t = 0;
332         }
333         bool termValid()
334         {
335             ASSERT(alternativeValid());
336             return t < alternative()->m_terms.size();
337         }
338         void nextTerm()
339         {
340             ASSERT(alternativeValid());
341             ++t;
342         }
343         PatternTerm& term()
344         {
345             ASSERT(alternativeValid());
346             return alternative()->m_terms[t];
347         }
348
349         PatternTerm& lookaheadTerm()
350         {
351             ASSERT(alternativeValid());
352             ASSERT((t + 1) < alternative()->m_terms.size());
353             return alternative()->m_terms[t + 1];
354         }
355         bool isSinglePatternCharacterLookaheadTerm()
356         {
357             ASSERT(alternativeValid());
358             return ((t + 1) < alternative()->m_terms.size())
359                 && (lookaheadTerm().type == PatternTerm::TypePatternCharacter)
360                 && (lookaheadTerm().quantityType == QuantifierFixedCount)
361                 && (lookaheadTerm().quantityCount == 1);
362         }
363
364         int inputOffset()
365         {
366             return term().inputPosition - checkedTotal;
367         }
368
369         void jumpToBacktrack(Jump jump, MacroAssembler* masm)
370         {
371             if (isBackTrackGenerated)
372                 jump.linkTo(backtrackLabel, masm);
373             else
374                 backTrackJumps.append(jump);
375         }
376         void jumpToBacktrack(JumpList& jumps, MacroAssembler* masm)
377         {
378             if (isBackTrackGenerated)
379                 jumps.linkTo(backtrackLabel, masm);
380             else
381                 backTrackJumps.append(jumps);
382         }
383         bool plantJumpToBacktrackIfExists(MacroAssembler* masm)
384         {
385             if (isBackTrackGenerated) {
386                 masm->jump(backtrackLabel);
387                 return true;
388             }
389             return false;
390         }
391         void addBacktrackJump(Jump jump)
392         {
393             backTrackJumps.append(jump);
394         }
395         void setBacktrackGenerated(Label label)
396         {
397             isBackTrackGenerated = true;
398             backtrackLabel = label;
399         }
400         void linkAlternativeBacktracks(MacroAssembler* masm)
401         {
402             isBackTrackGenerated = false;
403             backTrackJumps.link(masm);
404         }
405         void linkAlternativeBacktracksTo(Label label, MacroAssembler* masm)
406         {
407             isBackTrackGenerated = false;
408             backTrackJumps.linkTo(label, masm);
409         }
410         void propagateBacktrackingFrom(TermGenerationState& nestedParenthesesState, MacroAssembler* masm)
411         {
412             jumpToBacktrack(nestedParenthesesState.backTrackJumps, masm);
413             if (nestedParenthesesState.isBackTrackGenerated)
414                 setBacktrackGenerated(nestedParenthesesState.backtrackLabel);
415         }
416
417         PatternDisjunction* disjunction;
418         int checkedTotal;
419     private:
420         unsigned alt;
421         unsigned t;
422         JumpList backTrackJumps;
423         Label backtrackLabel;
424         bool isBackTrackGenerated;
425     };
426
427     void generateAssertionBOL(TermGenerationState& state)
428     {
429         PatternTerm& term = state.term();
430
431         if (m_pattern.m_multiline) {
432             const RegisterID character = regT0;
433
434             JumpList matchDest;
435             if (!term.inputPosition)
436                 matchDest.append(branch32(Equal, index, Imm32(state.checkedTotal)));
437
438             readCharacter(state.inputOffset() - 1, character);
439             matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
440             state.jumpToBacktrack(jump(), this);
441
442             matchDest.link(this);
443         } else {
444             // Erk, really should poison out these alternatives early. :-/
445             if (term.inputPosition)
446                 state.jumpToBacktrack(jump(), this);
447             else
448                 state.jumpToBacktrack(branch32(NotEqual, index, Imm32(state.checkedTotal)), this);
449         }
450     }
451
452     void generateAssertionEOL(TermGenerationState& state)
453     {
454         PatternTerm& term = state.term();
455
456         if (m_pattern.m_multiline) {
457             const RegisterID character = regT0;
458
459             JumpList matchDest;
460             if (term.inputPosition == state.checkedTotal)
461                 matchDest.append(atEndOfInput());
462
463             readCharacter(state.inputOffset(), character);
464             matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
465             state.jumpToBacktrack(jump(), this);
466
467             matchDest.link(this);
468         } else {
469             if (term.inputPosition == state.checkedTotal)
470                 state.jumpToBacktrack(notAtEndOfInput(), this);
471             // Erk, really should poison out these alternatives early. :-/
472             else
473                 state.jumpToBacktrack(jump(), this);
474         }
475     }
476
477     // Also falls though on nextIsNotWordChar.
478     void matchAssertionWordchar(TermGenerationState& state, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
479     {
480         const RegisterID character = regT0;
481         PatternTerm& term = state.term();
482
483         if (term.inputPosition == state.checkedTotal)
484             nextIsNotWordChar.append(atEndOfInput());
485
486         readCharacter(state.inputOffset(), character);
487         matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass());
488     }
489
490     void generateAssertionWordBoundary(TermGenerationState& state)
491     {
492         const RegisterID character = regT0;
493         PatternTerm& term = state.term();
494
495         Jump atBegin;
496         JumpList matchDest;
497         if (!term.inputPosition)
498             atBegin = branch32(Equal, index, Imm32(state.checkedTotal));
499         readCharacter(state.inputOffset() - 1, character);
500         matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass());
501         if (!term.inputPosition)
502             atBegin.link(this);
503
504         // We fall through to here if the last character was not a wordchar.
505         JumpList nonWordCharThenWordChar;
506         JumpList nonWordCharThenNonWordChar;
507         if (term.invertOrCapture) {
508             matchAssertionWordchar(state, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
509             nonWordCharThenWordChar.append(jump());
510         } else {
511             matchAssertionWordchar(state, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
512             nonWordCharThenNonWordChar.append(jump());
513         }
514         state.jumpToBacktrack(nonWordCharThenNonWordChar, this);
515
516         // We jump here if the last character was a wordchar.
517         matchDest.link(this);
518         JumpList wordCharThenWordChar;
519         JumpList wordCharThenNonWordChar;
520         if (term.invertOrCapture) {
521             matchAssertionWordchar(state, wordCharThenNonWordChar, wordCharThenWordChar);
522             wordCharThenWordChar.append(jump());
523         } else {
524             matchAssertionWordchar(state, wordCharThenWordChar, wordCharThenNonWordChar);
525             // This can fall-though!
526         }
527
528         state.jumpToBacktrack(wordCharThenWordChar, this);
529         
530         nonWordCharThenWordChar.link(this);
531         wordCharThenNonWordChar.link(this);
532     }
533
534     void generatePatternCharacterSingle(TermGenerationState& state)
535     {
536         const RegisterID character = regT0;
537         UChar ch = state.term().patternCharacter;
538
539         if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
540             readCharacter(state.inputOffset(), character);
541             or32(Imm32(32), character);
542             state.jumpToBacktrack(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))), this);
543         } else {
544             ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
545             state.jumpToBacktrack(jumpIfCharNotEquals(ch, state.inputOffset()), this);
546         }
547     }
548
549     void generatePatternCharacterPair(TermGenerationState& state)
550     {
551         const RegisterID character = regT0;
552         UChar ch1 = state.term().patternCharacter;
553         UChar ch2 = state.lookaheadTerm().patternCharacter;
554
555         int mask = 0;
556         int chPair = ch1 | (ch2 << 16);
557         
558         if (m_pattern.m_ignoreCase) {
559             if (isASCIIAlpha(ch1))
560                 mask |= 32;
561             if (isASCIIAlpha(ch2))
562                 mask |= 32 << 16;
563         }
564
565         if (mask) {
566             load32WithUnalignedHalfWords(BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), character);
567             or32(Imm32(mask), character);
568             state.jumpToBacktrack(branch32(NotEqual, character, Imm32(chPair | mask)), this);
569         } else
570             state.jumpToBacktrack(branch32WithUnalignedHalfWords(NotEqual, BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), Imm32(chPair)), this);
571     }
572
573     void generatePatternCharacterFixed(TermGenerationState& state)
574     {
575         const RegisterID character = regT0;
576         const RegisterID countRegister = regT1;
577         PatternTerm& term = state.term();
578         UChar ch = term.patternCharacter;
579
580         move(index, countRegister);
581         sub32(Imm32(term.quantityCount), countRegister);
582
583         Label loop(this);
584         if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
585             load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character);
586             or32(Imm32(32), character);
587             state.jumpToBacktrack(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))), this);
588         } else {
589             ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
590             state.jumpToBacktrack(branch16(NotEqual, BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), Imm32(ch)), this);
591         }
592         add32(Imm32(1), countRegister);
593         branch32(NotEqual, countRegister, index).linkTo(loop, this);
594     }
595
596     void generatePatternCharacterGreedy(TermGenerationState& state)
597     {
598         const RegisterID character = regT0;
599         const RegisterID countRegister = regT1;
600         PatternTerm& term = state.term();
601         UChar ch = term.patternCharacter;
602     
603         move(Imm32(0), countRegister);
604
605         JumpList failures;
606         Label loop(this);
607         failures.append(atEndOfInput());
608         if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
609             readCharacter(state.inputOffset(), character);
610             or32(Imm32(32), character);
611             failures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
612         } else {
613             ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
614             failures.append(jumpIfCharNotEquals(ch, state.inputOffset()));
615         }
616
617         add32(Imm32(1), countRegister);
618         add32(Imm32(1), index);
619         if (term.quantityCount != 0xffffffff) {
620             branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
621             failures.append(jump());
622         } else
623             jump(loop);
624
625         Label backtrackBegin(this);
626         loadFromFrame(term.frameLocation, countRegister);
627         state.jumpToBacktrack(branchTest32(Zero, countRegister), this);
628         sub32(Imm32(1), countRegister);
629         sub32(Imm32(1), index);
630
631         failures.link(this);
632
633         storeToFrame(countRegister, term.frameLocation);
634
635         state.setBacktrackGenerated(backtrackBegin);
636     }
637
638     void generatePatternCharacterNonGreedy(TermGenerationState& state)
639     {
640         const RegisterID character = regT0;
641         const RegisterID countRegister = regT1;
642         PatternTerm& term = state.term();
643         UChar ch = term.patternCharacter;
644     
645         move(Imm32(0), countRegister);
646
647         Jump firstTimeDoNothing = jump();
648
649         Label hardFail(this);
650         sub32(countRegister, index);
651         state.jumpToBacktrack(jump(), this);
652
653         Label backtrackBegin(this);
654         loadFromFrame(term.frameLocation, countRegister);
655
656         atEndOfInput().linkTo(hardFail, this);
657         if (term.quantityCount != 0xffffffff)
658             branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
659         if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
660             readCharacter(state.inputOffset(), character);
661             or32(Imm32(32), character);
662             branch32(NotEqual, character, Imm32(Unicode::toLower(ch))).linkTo(hardFail, this);
663         } else {
664             ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
665             jumpIfCharNotEquals(ch, state.inputOffset()).linkTo(hardFail, this);
666         }
667
668         add32(Imm32(1), countRegister);
669         add32(Imm32(1), index);
670
671         firstTimeDoNothing.link(this);
672         storeToFrame(countRegister, term.frameLocation);
673
674         state.setBacktrackGenerated(backtrackBegin);
675     }
676
677     void generateCharacterClassSingle(TermGenerationState& state)
678     {
679         const RegisterID character = regT0;
680         PatternTerm& term = state.term();
681
682         JumpList matchDest;
683         readCharacter(state.inputOffset(), character);
684         matchCharacterClass(character, matchDest, term.characterClass);
685
686         if (term.invertOrCapture)
687             state.jumpToBacktrack(matchDest, this);
688         else {
689             state.jumpToBacktrack(jump(), this);
690             matchDest.link(this);
691         }
692     }
693
694     void generateCharacterClassFixed(TermGenerationState& state)
695     {
696         const RegisterID character = regT0;
697         const RegisterID countRegister = regT1;
698         PatternTerm& term = state.term();
699
700         move(index, countRegister);
701         sub32(Imm32(term.quantityCount), countRegister);
702
703         Label loop(this);
704         JumpList matchDest;
705         load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character);
706         matchCharacterClass(character, matchDest, term.characterClass);
707
708         if (term.invertOrCapture)
709             state.jumpToBacktrack(matchDest, this);
710         else {
711             state.jumpToBacktrack(jump(), this);
712             matchDest.link(this);
713         }
714
715         add32(Imm32(1), countRegister);
716         branch32(NotEqual, countRegister, index).linkTo(loop, this);
717     }
718
719     void generateCharacterClassGreedy(TermGenerationState& state)
720     {
721         const RegisterID character = regT0;
722         const RegisterID countRegister = regT1;
723         PatternTerm& term = state.term();
724     
725         move(Imm32(0), countRegister);
726
727         JumpList failures;
728         Label loop(this);
729         failures.append(atEndOfInput());
730
731         if (term.invertOrCapture) {
732             readCharacter(state.inputOffset(), character);
733             matchCharacterClass(character, failures, term.characterClass);
734         } else {
735             JumpList matchDest;
736             readCharacter(state.inputOffset(), character);
737             matchCharacterClass(character, matchDest, term.characterClass);
738             failures.append(jump());
739             matchDest.link(this);
740         }
741
742         add32(Imm32(1), countRegister);
743         add32(Imm32(1), index);
744         if (term.quantityCount != 0xffffffff) {
745             branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
746             failures.append(jump());
747         } else
748             jump(loop);
749
750         Label backtrackBegin(this);
751         loadFromFrame(term.frameLocation, countRegister);
752         state.jumpToBacktrack(branchTest32(Zero, countRegister), this);
753         sub32(Imm32(1), countRegister);
754         sub32(Imm32(1), index);
755
756         failures.link(this);
757
758         storeToFrame(countRegister, term.frameLocation);
759
760         state.setBacktrackGenerated(backtrackBegin);
761     }
762
763     void generateCharacterClassNonGreedy(TermGenerationState& state)
764     {
765         const RegisterID character = regT0;
766         const RegisterID countRegister = regT1;
767         PatternTerm& term = state.term();
768     
769         move(Imm32(0), countRegister);
770
771         Jump firstTimeDoNothing = jump();
772
773         Label hardFail(this);
774         sub32(countRegister, index);
775         state.jumpToBacktrack(jump(), this);
776
777         Label backtrackBegin(this);
778         loadFromFrame(term.frameLocation, countRegister);
779
780         atEndOfInput().linkTo(hardFail, this);
781         branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
782
783         JumpList matchDest;
784         readCharacter(state.inputOffset(), character);
785         matchCharacterClass(character, matchDest, term.characterClass);
786
787         if (term.invertOrCapture)
788             matchDest.linkTo(hardFail, this);
789         else {
790             jump(hardFail);
791             matchDest.link(this);
792         }
793
794         add32(Imm32(1), countRegister);
795         add32(Imm32(1), index);
796
797         firstTimeDoNothing.link(this);
798         storeToFrame(countRegister, term.frameLocation);
799
800         state.setBacktrackGenerated(backtrackBegin);
801     }
802
803     void generateParenthesesDisjunction(PatternTerm& parenthesesTerm, TermGenerationState& state, unsigned alternativeFrameLocation)
804     {
805         ASSERT((parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern) || (parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion));
806         ASSERT(parenthesesTerm.quantityCount == 1);
807     
808         PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction;
809         unsigned preCheckedCount = ((parenthesesTerm.quantityType == QuantifierFixedCount) && (parenthesesTerm.type != PatternTerm::TypeParentheticalAssertion)) ? disjunction->m_minimumSize : 0;
810
811         if (disjunction->m_alternatives.size() == 1) {
812             state.resetAlternative();
813             ASSERT(state.alternativeValid());
814             PatternAlternative* alternative = state.alternative();
815             optimizeAlternative(alternative);
816
817             int countToCheck = alternative->m_minimumSize - preCheckedCount;
818             if (countToCheck) {
819                 ASSERT((parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion) || (parenthesesTerm.quantityType != QuantifierFixedCount));
820
821                 // FIXME: This is quite horrible.  The call to 'plantJumpToBacktrackIfExists'
822                 // will be forced to always trampoline into here, just to decrement the index.
823                 // Ick. 
824                 Jump skip = jump();
825
826                 Label backtrackBegin(this);
827                 sub32(Imm32(countToCheck), index);
828                 state.addBacktrackJump(jump());
829                 
830                 skip.link(this);
831
832                 state.setBacktrackGenerated(backtrackBegin);
833
834                 state.jumpToBacktrack(jumpIfNoAvailableInput(countToCheck), this);
835                 state.checkedTotal += countToCheck;
836             }
837
838             for (state.resetTerm(); state.termValid(); state.nextTerm())
839                 generateTerm(state);
840
841             state.checkedTotal -= countToCheck;
842         } else {
843             JumpList successes;
844
845             for (state.resetAlternative(); state.alternativeValid(); state.nextAlternative()) {
846
847                 PatternAlternative* alternative = state.alternative();
848                 optimizeAlternative(alternative);
849
850                 ASSERT(alternative->m_minimumSize >= preCheckedCount);
851                 int countToCheck = alternative->m_minimumSize - preCheckedCount;
852                 if (countToCheck) {
853                     state.addBacktrackJump(jumpIfNoAvailableInput(countToCheck));
854                     state.checkedTotal += countToCheck;
855                 }
856
857                 for (state.resetTerm(); state.termValid(); state.nextTerm())
858                     generateTerm(state);
859
860                 // Matched an alternative.
861                 DataLabelPtr dataLabel = storeToFrameWithPatch(alternativeFrameLocation);
862                 successes.append(jump());
863
864                 // Alternative did not match.
865                 Label backtrackLocation(this);
866                 
867                 // Can we backtrack the alternative? - if so, do so.  If not, just fall through to the next one.
868                 state.plantJumpToBacktrackIfExists(this);
869                 
870                 state.linkAlternativeBacktracks(this);
871
872                 if (countToCheck) {
873                     sub32(Imm32(countToCheck), index);
874                     state.checkedTotal -= countToCheck;
875                 }
876
877                 m_backtrackRecords.append(AlternativeBacktrackRecord(dataLabel, backtrackLocation));
878             }
879             // We fall through to here when the last alternative fails.
880             // Add a backtrack out of here for the parenthese handling code to link up.
881             state.addBacktrackJump(jump());
882
883             // Generate a trampoline for the parens code to backtrack to, to retry the
884             // next alternative.
885             state.setBacktrackGenerated(label());
886             loadFromFrameAndJump(alternativeFrameLocation);
887
888             // FIXME: both of the above hooks are a little inefficient, in that you
889             // may end up trampolining here, just to trampoline back out to the
890             // parentheses code, or vice versa.  We can probably eliminate a jump
891             // by restructuring, but coding this way for now for simplicity during
892             // development.
893
894             successes.link(this);
895         }
896     }
897
898     void generateParenthesesSingle(TermGenerationState& state)
899     {
900         const RegisterID indexTemporary = regT0;
901         PatternTerm& term = state.term();
902         PatternDisjunction* disjunction = term.parentheses.disjunction;
903         ASSERT(term.quantityCount == 1);
904
905         unsigned preCheckedCount = ((term.quantityCount == 1) && (term.quantityType == QuantifierFixedCount)) ? disjunction->m_minimumSize : 0;
906
907         unsigned parenthesesFrameLocation = term.frameLocation;
908         unsigned alternativeFrameLocation = parenthesesFrameLocation;
909         if (term.quantityType != QuantifierFixedCount)
910             alternativeFrameLocation += RegexStackSpaceForBackTrackInfoParenthesesOnce;
911
912         // optimized case - no capture & no quantifier can be handled in a light-weight manner.
913         if (!term.invertOrCapture && (term.quantityType == QuantifierFixedCount)) {
914             TermGenerationState parenthesesState(disjunction, state.checkedTotal);
915             generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
916             // this expects that any backtracks back out of the parentheses will be in the
917             // parenthesesState's backTrackJumps vector, and that if they need backtracking
918             // they will have set an entry point on the parenthesesState's backtrackLabel.
919             state.propagateBacktrackingFrom(parenthesesState, this);
920         } else {
921             Jump nonGreedySkipParentheses;
922             Label nonGreedyTryParentheses;
923             if (term.quantityType == QuantifierGreedy)
924                 storeToFrame(Imm32(1), parenthesesFrameLocation);
925             else if (term.quantityType == QuantifierNonGreedy) {
926                 storeToFrame(Imm32(0), parenthesesFrameLocation);
927                 nonGreedySkipParentheses = jump();
928                 nonGreedyTryParentheses = label();
929                 storeToFrame(Imm32(1), parenthesesFrameLocation);
930             }
931
932             // store the match start index
933             if (term.invertOrCapture) {
934                 int inputOffset = state.inputOffset() - preCheckedCount;
935                 if (inputOffset) {
936                     move(index, indexTemporary);
937                     add32(Imm32(inputOffset), indexTemporary);
938                     store32(indexTemporary, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
939                 } else
940                     store32(index, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
941             }
942
943             // generate the body of the parentheses
944             TermGenerationState parenthesesState(disjunction, state.checkedTotal);
945             generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
946
947             // store the match end index
948             if (term.invertOrCapture) {
949                 int inputOffset = state.inputOffset();
950                 if (inputOffset) {
951                     move(index, indexTemporary);
952                     add32(Imm32(state.inputOffset()), indexTemporary);
953                     store32(indexTemporary, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
954                 } else
955                     store32(index, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
956             }
957             Jump success = jump();
958
959             // A failure AFTER the parens jumps here
960             Label backtrackFromAfterParens(this);
961
962             if (term.quantityType == QuantifierGreedy) {
963                 // If this is zero we have now tested with both with and without the parens.
964                 loadFromFrame(parenthesesFrameLocation, indexTemporary);
965                 state.jumpToBacktrack(branchTest32(Zero, indexTemporary), this);
966             } else if (term.quantityType == QuantifierNonGreedy) {
967                 // If this is zero we have now tested with both with and without the parens.
968                 loadFromFrame(parenthesesFrameLocation, indexTemporary);
969                 branchTest32(Zero, indexTemporary).linkTo(nonGreedyTryParentheses, this);
970             }
971
972             parenthesesState.plantJumpToBacktrackIfExists(this);
973             // A failure WITHIN the parens jumps here
974             parenthesesState.linkAlternativeBacktracks(this);
975             if (term.invertOrCapture) {
976                 store32(Imm32(-1), Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
977                 store32(Imm32(-1), Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
978             }
979
980             if (term.quantityType == QuantifierGreedy)
981                 storeToFrame(Imm32(0), parenthesesFrameLocation);
982             else
983                 state.jumpToBacktrack(jump(), this);
984
985             state.setBacktrackGenerated(backtrackFromAfterParens);
986             if (term.quantityType == QuantifierNonGreedy)
987                 nonGreedySkipParentheses.link(this);
988             success.link(this);
989         }
990     }
991
992     void generateParentheticalAssertion(TermGenerationState& state)
993     {
994         PatternTerm& term = state.term();
995         PatternDisjunction* disjunction = term.parentheses.disjunction;
996         ASSERT(term.quantityCount == 1);
997         ASSERT(term.quantityType == QuantifierFixedCount);
998
999         unsigned parenthesesFrameLocation = term.frameLocation;
1000         unsigned alternativeFrameLocation = parenthesesFrameLocation + RegexStackSpaceForBackTrackInfoParentheticalAssertion;
1001
1002         int countCheckedAfterAssertion = state.checkedTotal - term.inputPosition;
1003
1004         if (term.invertOrCapture) {
1005             // Inverted case
1006             storeToFrame(index, parenthesesFrameLocation);
1007
1008             state.checkedTotal -= countCheckedAfterAssertion;
1009             if (countCheckedAfterAssertion)
1010                 sub32(Imm32(countCheckedAfterAssertion), index);
1011
1012             TermGenerationState parenthesesState(disjunction, state.checkedTotal);
1013             generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
1014             // Success! - which means - Fail!
1015             loadFromFrame(parenthesesFrameLocation, index);
1016             state.jumpToBacktrack(jump(), this);
1017
1018             // And fail means success.
1019             parenthesesState.linkAlternativeBacktracks(this);
1020             loadFromFrame(parenthesesFrameLocation, index);
1021
1022             state.checkedTotal += countCheckedAfterAssertion;
1023         } else {
1024             // Normal case
1025             storeToFrame(index, parenthesesFrameLocation);
1026
1027             state.checkedTotal -= countCheckedAfterAssertion;
1028             if (countCheckedAfterAssertion)
1029                 sub32(Imm32(countCheckedAfterAssertion), index);
1030
1031             TermGenerationState parenthesesState(disjunction, state.checkedTotal);
1032             generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
1033             // Success! - which means - Success!
1034             loadFromFrame(parenthesesFrameLocation, index);
1035             Jump success = jump();
1036
1037             parenthesesState.linkAlternativeBacktracks(this);
1038             loadFromFrame(parenthesesFrameLocation, index);
1039             state.jumpToBacktrack(jump(), this);
1040
1041             success.link(this);
1042
1043             state.checkedTotal += countCheckedAfterAssertion;
1044         }
1045     }
1046
1047     void generateTerm(TermGenerationState& state)
1048     {
1049         PatternTerm& term = state.term();
1050
1051         switch (term.type) {
1052         case PatternTerm::TypeAssertionBOL:
1053             generateAssertionBOL(state);
1054             break;
1055         
1056         case PatternTerm::TypeAssertionEOL:
1057             generateAssertionEOL(state);
1058             break;
1059         
1060         case PatternTerm::TypeAssertionWordBoundary:
1061             generateAssertionWordBoundary(state);
1062             break;
1063         
1064         case PatternTerm::TypePatternCharacter:
1065             switch (term.quantityType) {
1066             case QuantifierFixedCount:
1067                 if (term.quantityCount == 1) {
1068                     if (state.isSinglePatternCharacterLookaheadTerm() && (state.lookaheadTerm().inputPosition == (term.inputPosition + 1))) {
1069                         generatePatternCharacterPair(state);
1070                         state.nextTerm();
1071                     } else
1072                         generatePatternCharacterSingle(state);
1073                 } else
1074                     generatePatternCharacterFixed(state);
1075                 break;
1076             case QuantifierGreedy:
1077                 generatePatternCharacterGreedy(state);
1078                 break;
1079             case QuantifierNonGreedy:
1080                 generatePatternCharacterNonGreedy(state);
1081                 break;
1082             }
1083             break;
1084
1085         case PatternTerm::TypeCharacterClass:
1086             switch (term.quantityType) {
1087             case QuantifierFixedCount:
1088                 if (term.quantityCount == 1)
1089                     generateCharacterClassSingle(state);
1090                 else
1091                     generateCharacterClassFixed(state);
1092                 break;
1093             case QuantifierGreedy:
1094                 generateCharacterClassGreedy(state);
1095                 break;
1096             case QuantifierNonGreedy:
1097                 generateCharacterClassNonGreedy(state);
1098                 break;
1099             }
1100             break;
1101
1102         case PatternTerm::TypeBackReference:
1103             ASSERT_NOT_REACHED();
1104             break;
1105
1106         case PatternTerm::TypeForwardReference:
1107             break;
1108
1109         case PatternTerm::TypeParenthesesSubpattern:
1110             ASSERT((term.quantityCount == 1) && !term.parentheses.isCopy); // must fallback to pcre before this point
1111             generateParenthesesSingle(state);
1112             break;
1113
1114         case PatternTerm::TypeParentheticalAssertion:
1115             generateParentheticalAssertion(state);
1116             break;
1117         }
1118     }
1119
1120     void generateDisjunction(PatternDisjunction* disjunction)
1121     {
1122         TermGenerationState state(disjunction, 0);
1123         state.resetAlternative();
1124
1125         // Plant a check to see if there is sufficient input available to run the first alternative.
1126         // Jumping back to the label 'firstAlternative' will get to this check, jumping to
1127         // 'firstAlternativeInputChecked' will jump directly to matching the alternative having
1128         // skipped this check.
1129
1130         Label firstAlternative(this);
1131
1132         // check availability for the next alternative
1133         int countCheckedForCurrentAlternative = 0;
1134         int countToCheckForFirstAlternative = 0;
1135         bool hasShorterAlternatives = false;
1136         JumpList notEnoughInputForPreviousAlternative;
1137
1138         if (state.alternativeValid()) {
1139             PatternAlternative* alternative = state.alternative();
1140             countToCheckForFirstAlternative = alternative->m_minimumSize;
1141             state.checkedTotal += countToCheckForFirstAlternative;
1142             if (countToCheckForFirstAlternative)
1143                 notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative));
1144             countCheckedForCurrentAlternative = countToCheckForFirstAlternative;
1145         }
1146
1147         Label firstAlternativeInputChecked(this);
1148
1149         while (state.alternativeValid()) {
1150             // Track whether any alternatives are shorter than the first one.
1151             hasShorterAlternatives = hasShorterAlternatives || (countCheckedForCurrentAlternative < countToCheckForFirstAlternative);
1152
1153             PatternAlternative* alternative = state.alternative();
1154             optimizeAlternative(alternative);
1155
1156             for (state.resetTerm(); state.termValid(); state.nextTerm())
1157                 generateTerm(state);
1158
1159             // If we get here, the alternative matched.
1160             if (m_pattern.m_body->m_callFrameSize)
1161                 addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
1162             
1163             ASSERT(index != returnRegister);
1164             if (m_pattern.m_body->m_hasFixedSize) {
1165                 move(index, returnRegister);
1166                 if (alternative->m_minimumSize)
1167                     sub32(Imm32(alternative->m_minimumSize), returnRegister);
1168             } else
1169                 pop(returnRegister);
1170             store32(index, Address(output, 4));
1171             store32(returnRegister, output);
1172
1173             generateReturn();
1174
1175             state.nextAlternative();
1176
1177             // if there are any more alternatives, plant the check for input before looping.
1178             if (state.alternativeValid()) {
1179                 PatternAlternative* nextAlternative = state.alternative();
1180                 int countToCheckForNextAlternative = nextAlternative->m_minimumSize;
1181
1182                 if (countCheckedForCurrentAlternative > countToCheckForNextAlternative) { // CASE 1: current alternative was longer than the next one.
1183                     // If we get here, there the last input checked failed.
1184                     notEnoughInputForPreviousAlternative.link(this);
1185
1186                     // Check if sufficent input available to run the next alternative 
1187                     notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative));
1188                     // We are now in the correct state to enter the next alternative; this add is only required
1189                     // to mirror and revert operation of the sub32, just below.
1190                     add32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index);
1191
1192                     // If we get here, there the last input checked passed.
1193                     state.linkAlternativeBacktracks(this);
1194                     // No need to check if we can run the next alternative, since it is shorter -
1195                     // just update index.
1196                     sub32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index);
1197                 } else if (countCheckedForCurrentAlternative < countToCheckForNextAlternative) { // CASE 2: next alternative is longer than the current one.
1198                     // If we get here, there the last input checked failed.
1199                     // If there is insufficient input to run the current alternative, and the next alternative is longer,
1200                     // then there is definitely not enough input to run it - don't even check. Just adjust index, as if
1201                     // we had checked.
1202                     notEnoughInputForPreviousAlternative.link(this);
1203                     add32(Imm32(countToCheckForNextAlternative - countCheckedForCurrentAlternative), index);
1204                     notEnoughInputForPreviousAlternative.append(jump());
1205
1206                     // The next alternative is longer than the current one; check the difference.
1207                     state.linkAlternativeBacktracks(this);
1208                     notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative));
1209                 } else { // CASE 3: Both alternatives are the same length.
1210                     ASSERT(countCheckedForCurrentAlternative == countToCheckForNextAlternative);
1211
1212                     // If the next alterative is the same length as this one, then no need to check the input -
1213                     // if there was sufficent input to run the current alternative then there is sufficient
1214                     // input to run the next one; if not, there isn't.
1215                     state.linkAlternativeBacktracks(this);
1216                 }
1217
1218                 state.checkedTotal -= countCheckedForCurrentAlternative;
1219                 countCheckedForCurrentAlternative = countToCheckForNextAlternative;
1220                 state.checkedTotal += countCheckedForCurrentAlternative;
1221             }
1222         }
1223         
1224         // If we get here, all Alternatives failed...
1225
1226         state.checkedTotal -= countCheckedForCurrentAlternative;
1227
1228         // How much more input need there be to be able to retry from the first alternative?
1229         // examples:
1230         //   /yarr_jit/ or /wrec|pcre/
1231         //     In these examples we need check for one more input before looping.
1232         //   /yarr_jit|pcre/
1233         //     In this case we need check for 5 more input to loop (+4 to allow for the first alterative
1234         //     being four longer than the last alternative checked, and another +1 to effectively move
1235         //     the start position along by one).
1236         //   /yarr|rules/ or /wrec|notsomuch/
1237         //     In these examples, provided that there was sufficient input to have just been matching for
1238         //     the second alternative we can loop without checking for available input (since the second
1239         //     alternative is longer than the first).  In the latter example we need to decrement index
1240         //     (by 4) so the start position is only progressed by 1 from the last iteration.
1241         int incrementForNextIter = (countToCheckForFirstAlternative - countCheckedForCurrentAlternative) + 1;
1242
1243         // First, deal with the cases where there was sufficient input to try the last alternative.
1244         if (incrementForNextIter > 0) // We need to check for more input anyway, fall through to the checking below.
1245             state.linkAlternativeBacktracks(this);
1246         else if (m_pattern.m_body->m_hasFixedSize && !incrementForNextIter) // No need to update anything, link these backtracks straight to the to pof the loop!
1247             state.linkAlternativeBacktracksTo(firstAlternativeInputChecked, this);
1248         else { // no need to check the input, but we do have some bookkeeping to do first.
1249             state.linkAlternativeBacktracks(this);
1250
1251             // Where necessary update our preserved start position.
1252             if (!m_pattern.m_body->m_hasFixedSize) {
1253                 move(index, regT0);
1254                 sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0);
1255                 poke(regT0, m_pattern.m_body->m_callFrameSize);
1256             }
1257
1258             // Update index if necessary, and loop (without checking).
1259             if (incrementForNextIter)
1260                 add32(Imm32(incrementForNextIter), index);
1261             jump().linkTo(firstAlternativeInputChecked, this);
1262         }
1263
1264         notEnoughInputForPreviousAlternative.link(this);
1265         // Update our idea of the start position, if we're tracking this.
1266         if (!m_pattern.m_body->m_hasFixedSize) {
1267             if (countCheckedForCurrentAlternative - 1) {
1268                 move(index, regT0);
1269                 sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0);
1270                 poke(regT0, m_pattern.m_body->m_callFrameSize);
1271             } else
1272                 poke(index, m_pattern.m_body->m_callFrameSize);
1273         }
1274         // Check if there is sufficent input to run the first alternative again.
1275         jumpIfAvailableInput(incrementForNextIter).linkTo(firstAlternativeInputChecked, this);
1276         // No - insufficent input to run the first alteranative, are there any other alternatives we
1277         // might need to check?  If so, the last check will have left the index incremented by
1278         // (countToCheckForFirstAlternative + 1), so we need test whether countToCheckForFirstAlternative
1279         // LESS input is available, to have the effect of just progressing the start position by 1
1280         // from the last iteration.  If this check passes we can just jump up to the check associated
1281         // with the first alternative in the loop.  This is a bit sad, since we'll end up trying the
1282         // first alternative again, and this check will fail (otherwise the check planted just above
1283         // here would have passed).  This is a bit sad, however it saves trying to do something more
1284         // complex here in compilation, and in the common case we should end up coallescing the checks.
1285         //
1286         // FIXME: a nice improvement here may be to stop trying to match sooner, based on the least
1287         // of the minimum-alternative-lengths.  E.g. if I have two alternatives of length 200 and 150,
1288         // and a string of length 100, we'll end up looping index from 0 to 100, checking whether there
1289         // is sufficient input to run either alternative (constantly failing).  If there had been only
1290         // one alternative, or if the shorter alternative had come first, we would have terminated
1291         // immediately. :-/
1292         if (hasShorterAlternatives)
1293             jumpIfAvailableInput(-countToCheckForFirstAlternative).linkTo(firstAlternative, this);
1294         // index will now be a bit garbled (depending on whether 'hasShorterAlternatives' is true,
1295         // it has either been incremented by 1 or by (countToCheckForFirstAlternative + 1) ... 
1296         // but since we're about to return a failure this doesn't really matter!)
1297
1298         unsigned frameSize = m_pattern.m_body->m_callFrameSize;
1299         if (!m_pattern.m_body->m_hasFixedSize)
1300             ++frameSize;
1301         if (frameSize)
1302             addPtr(Imm32(frameSize * sizeof(void*)), stackPointerRegister);
1303
1304         move(Imm32(-1), returnRegister);
1305
1306         generateReturn();
1307     }
1308
1309     void generateEnter()
1310     {
1311 #if CPU(X86_64)
1312         push(X86Registers::ebp);
1313         move(stackPointerRegister, X86Registers::ebp);
1314         push(X86Registers::ebx);
1315 #elif CPU(X86)
1316         push(X86Registers::ebp);
1317         move(stackPointerRegister, X86Registers::ebp);
1318         // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
1319         push(X86Registers::ebx);
1320         push(X86Registers::edi);
1321         push(X86Registers::esi);
1322         // load output into edi (2 = saved ebp + return address).
1323     #if COMPILER(MSVC)
1324         loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input);
1325         loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index);
1326         loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length);
1327         loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output);
1328     #else
1329         loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output);
1330     #endif
1331 #elif CPU(ARM)
1332         push(ARMRegisters::r4);
1333         push(ARMRegisters::r5);
1334         push(ARMRegisters::r6);
1335         move(ARMRegisters::r3, output);
1336 #elif CPU(MIPS)
1337         // Do nothing.
1338 #endif
1339     }
1340
1341     void generateReturn()
1342     {
1343 #if CPU(X86_64)
1344         pop(X86Registers::ebx);
1345         pop(X86Registers::ebp);
1346 #elif CPU(X86)
1347         pop(X86Registers::esi);
1348         pop(X86Registers::edi);
1349         pop(X86Registers::ebx);
1350         pop(X86Registers::ebp);
1351 #elif CPU(ARM)
1352         pop(ARMRegisters::r6);
1353         pop(ARMRegisters::r5);
1354         pop(ARMRegisters::r4);
1355 #elif CPU(MIPS)
1356         // Do nothing
1357 #endif
1358         ret();
1359     }
1360
1361 public:
1362     RegexGenerator(RegexPattern& pattern)
1363         : m_pattern(pattern)
1364     {
1365     }
1366
1367     void generate()
1368     {
1369         generateEnter();
1370
1371         // TODO: do I really want this on the stack?
1372         if (!m_pattern.m_body->m_hasFixedSize)
1373             push(index);
1374
1375         if (m_pattern.m_body->m_callFrameSize)
1376             subPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
1377
1378         generateDisjunction(m_pattern.m_body);
1379     }
1380
1381     void compile(JSGlobalData* globalData, RegexCodeBlock& jitObject)
1382     {
1383         generate();
1384
1385         LinkBuffer patchBuffer(this, globalData->executableAllocator.poolForSize(size()));
1386
1387         for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
1388             patchBuffer.patch(m_backtrackRecords[i].dataLabel, patchBuffer.locationOf(m_backtrackRecords[i].backtrackLocation));
1389
1390         jitObject.set(patchBuffer.finalizeCode());
1391     }
1392
1393 private:
1394     RegexPattern& m_pattern;
1395     Vector<AlternativeBacktrackRecord> m_backtrackRecords;
1396 };
1397
1398 void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& patternString, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline)
1399 {
1400     RegexPattern pattern(ignoreCase, multiline);
1401
1402     if ((error = compileRegex(patternString, pattern)))
1403         return;
1404
1405     numSubpatterns = pattern.m_numSubpatterns;
1406
1407     if (pattern.m_shouldFallBack) {
1408         JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase;
1409         JSRegExpMultilineOption multilineOption = multiline ? JSRegExpMultiline : JSRegExpSingleLine;
1410         jitObject.setFallback(jsRegExpCompile(reinterpret_cast<const UChar*>(patternString.data()), patternString.size(), ignoreCaseOption, multilineOption, &numSubpatterns, &error));
1411     } else {
1412         RegexGenerator generator(pattern);
1413         generator.compile(globalData, jitObject);
1414     }
1415 }
1416
1417 }}
1418
1419 #endif
1420
1421
1422
1423
1424