JavaScriptCore:
[WebKit-https.git] / JavaScriptCore / pcre / pcre_exec.cpp
1 /* This is JavaScriptCore's variant of the PCRE library. While this library
2 started out as a copy of PCRE, many of the features of PCRE have been
3 removed. This library now supports only the regular expression features
4 required by the JavaScript language specification, and has only the functions
5 needed by JavaScriptCore and the rest of WebKit.
6
7                  Originally written by Philip Hazel
8            Copyright (c) 1997-2006 University of Cambridge
9     Copyright (C) 2002, 2004, 2006, 2007 Apple Inc. All rights reserved.
10     Copyright (C) 2007 Eric Seidel <eric@webkit.org>
11
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40
41 /* This module contains jsRegExpExecute(), the externally visible function
42 that does pattern matching using an NFA algorithm, following the rules from
43 the JavaScript specification. There are also some supporting functions. */
44
45 #include "config.h"
46
47 #include "pcre_internal.h"
48
49 #include <wtf/ASCIICType.h>
50 #include <wtf/Vector.h>
51
52 using namespace WTF;
53
54 #ifdef __GNUC__
55 #define USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
56 //#define USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
57 #endif
58
59 /* Avoid warnings on Windows. */
60 #undef min
61 #undef max
62
63 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
64 typedef int ReturnLocation;
65 #else
66 typedef void* ReturnLocation;
67 #endif
68
69 struct MatchFrame {
70     ReturnLocation returnLocation;
71     struct MatchFrame* previousFrame;
72     
73     /* Function arguments that may change */
74     struct {
75         const UChar* subjectPtr;
76         const unsigned char* instructionPtr;
77         int offsetTop;
78         const UChar* subpatternStart;
79     } args;
80     
81     
82     /* PCRE uses "fake" recursion built off of gotos, thus
83      stack-based local variables are not safe to use.  Instead we have to
84      store local variables on the current MatchFrame. */
85     struct {
86         const unsigned char* data;
87         const unsigned char* startOfRepeatingBracket;
88         const UChar* subjectPtrAtStartOfInstruction; // Several instrutions stash away a subjectPtr here for later compare
89         const unsigned char* instructionPtrAtStartOfOnce;
90         
91         int repeatOthercase;
92         
93         int ctype;
94         int fc;
95         int fi;
96         int length;
97         int max;
98         int number;
99         int offset;
100         int saveOffset1;
101         int saveOffset2;
102         int saveOffset3;
103         
104         const UChar* subpatternStart;
105     } locals;
106 };
107
108 /* Structure for passing "static" information around between the functions
109 doing traditional NFA matching, so that they are thread-safe. */
110
111 struct MatchData {
112   int*   offsetVector;         /* Offset vector */
113   int    offsetEnd;            /* One past the end */
114   int    offsetMax;            /* The maximum usable for return data */
115   bool   offsetOverflow;       /* Set if too many extractions */
116   const UChar*  startSubject;         /* Start of the subject string */
117   const UChar*  endSubject;           /* End of the subject string */
118   const UChar*  endMatchPtr;         /* Subject position at end match */
119   int    endOffsetTop;        /* Highwater mark at end of match */
120   bool   multiline;
121   bool   ignoreCase;
122 };
123
124 /* The maximum remaining length of subject we are prepared to search for a
125 req_byte match. */
126
127 #define REQ_BYTE_MAX 1000
128
129 /* The below limit restricts the number of "recursive" match calls in order to
130 avoid spending exponential time on complex regular expressions. */
131
132 static const unsigned matchLimit = 100000;
133
134 #ifdef DEBUG
135 /*************************************************
136 *        Debugging function to print chars       *
137 *************************************************/
138
139 /* Print a sequence of chars in printable format, stopping at the end of the
140 subject if the requested.
141
142 Arguments:
143   p           points to characters
144   length      number to print
145   isSubject  true if printing from within md.startSubject
146   md          pointer to matching data block, if isSubject is true
147 */
148
149 static void pchars(const UChar* p, int length, bool isSubject, const MatchData& md)
150 {
151     if (isSubject && length > md.endSubject - p)
152         length = md.endSubject - p;
153     while (length-- > 0) {
154         int c;
155         if (isprint(c = *(p++)))
156             printf("%c", c);
157         else if (c < 256)
158             printf("\\x%02x", c);
159         else
160             printf("\\x{%x}", c);
161     }
162 }
163 #endif
164
165 /*************************************************
166 *          Match a back-reference                *
167 *************************************************/
168
169 /* If a back reference hasn't been set, the length that is passed is greater
170 than the number of characters left in the string, so the match fails.
171
172 Arguments:
173   offset      index into the offset vector
174   subjectPtr        points into the subject
175   length      length to be matched
176   md          points to match data block
177
178 Returns:      true if matched
179 */
180
181 static bool matchRef(int offset, const UChar* subjectPtr, int length, const MatchData& md)
182 {
183     const UChar* p = md.startSubject + md.offsetVector[offset];
184     
185 #ifdef DEBUG
186     if (subjectPtr >= md.endSubject)
187         printf("matching subject <null>");
188     else {
189         printf("matching subject ");
190         pchars(subjectPtr, length, true, md);
191     }
192     printf(" against backref ");
193     pchars(p, length, false, md);
194     printf("\n");
195 #endif
196     
197     /* Always fail if not enough characters left */
198     
199     if (length > md.endSubject - subjectPtr)
200         return false;
201     
202     /* Separate the caselesss case for speed */
203     
204     if (md.ignoreCase) {
205         while (length-- > 0) {
206             UChar c = *p++;
207             int othercase = kjs_pcre_ucp_othercase(c);
208             UChar d = *subjectPtr++;
209             if (c != d && othercase != d)
210                 return false;
211         }
212     }
213     else {
214         while (length-- > 0)
215             if (*p++ != *subjectPtr++)
216                 return false;
217     }
218     
219     return true;
220 }
221
222 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
223
224 /* Use numbered labels and switch statement at the bottom of the match function. */
225
226 #define RMATCH_WHERE(num) num
227 #define RRETURN_LABEL RRETURN_SWITCH
228
229 #else
230
231 /* Use GCC's computed goto extension. */
232
233 /* For one test case this is more than 40% faster than the switch statement.
234 We could avoid the use of the num argument entirely by using local labels,
235 but using it for the GCC case as well as the non-GCC case allows us to share
236 a bit more code and notice if we use conflicting numbers.*/
237
238 #define RMATCH_WHERE(num) &&RRETURN_##num
239 #define RRETURN_LABEL *stack.currentFrame->returnLocation
240
241 #endif
242
243 #define RECURSIVE_MATCH_COMMON(num) \
244     goto RECURSE;\
245     RRETURN_##num: \
246     stack.popCurrentFrame();
247
248 #define RECURSIVE_MATCH(num, ra, rb) \
249     do { \
250         stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \
251         RECURSIVE_MATCH_COMMON(num) \
252     } while (0)
253
254 #define RECURSIVE_MATCH_STARTNG_NEW_GROUP(num, ra, rb) \
255     do { \
256         stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \
257         startNewGroup(stack.currentFrame); \
258         RECURSIVE_MATCH_COMMON(num) \
259     } while (0)
260
261 #define RRETURN goto RRETURN_LABEL
262
263 #define RRETURN_NO_MATCH do { isMatch = false; RRETURN; } while (0)
264
265 /*************************************************
266 *         Match from current position            *
267 *************************************************/
268
269 /* On entry instructionPtr points to the first opcode, and subjectPtr to the first character
270 in the subject string, while substringStart holds the value of subjectPtr at the start of the
271 last bracketed group - used for breaking infinite loops matching zero-length
272 strings. This function is called recursively in many circumstances. Whenever it
273 returns a negative (error) response, the outer match() call must also return the
274 same response.
275
276 Arguments:
277    subjectPtr        pointer in subject
278    instructionPtr       position in code
279    offsetTop  current top pointer
280    md          pointer to "static" info for the match
281
282 Returns:       1 if matched          )  these values are >= 0
283                0 if failed to match  )
284                a negative error value if aborted by an error condition
285                  (e.g. stopped by repeated call or recursion limit)
286 */
287
288 static const unsigned FRAMES_ON_STACK = 16;
289
290 struct MatchStack {
291     MatchStack()
292         : framesEnd(frames + FRAMES_ON_STACK)
293         , currentFrame(frames)
294         , size(1) // match() creates accesses the first frame w/o calling pushNewFrame
295     {
296         ASSERT((sizeof(frames) / sizeof(frames[0])) == FRAMES_ON_STACK);
297     }
298     
299     MatchFrame frames[FRAMES_ON_STACK];
300     MatchFrame* framesEnd;
301     MatchFrame* currentFrame;
302     unsigned size;
303     
304     inline bool canUseStackBufferForNextFrame()
305     {
306         return size < FRAMES_ON_STACK;
307     }
308     
309     inline MatchFrame* allocateNextFrame()
310     {
311         if (canUseStackBufferForNextFrame())
312             return currentFrame + 1;
313         return new MatchFrame;
314     }
315     
316     inline void pushNewFrame(const unsigned char* instructionPtr, const UChar* subpatternStart, ReturnLocation returnLocation)
317     {
318         MatchFrame* newframe = allocateNextFrame();
319         newframe->previousFrame = currentFrame;
320
321         newframe->args.subjectPtr = currentFrame->args.subjectPtr;
322         newframe->args.offsetTop = currentFrame->args.offsetTop;
323         newframe->args.instructionPtr = instructionPtr;
324         newframe->args.subpatternStart = subpatternStart;
325         newframe->returnLocation = returnLocation;
326         size++;
327
328         currentFrame = newframe;
329     }
330     
331     inline void popCurrentFrame()
332     {
333         MatchFrame* oldFrame = currentFrame;
334         currentFrame = currentFrame->previousFrame;
335         if (size > FRAMES_ON_STACK)
336             delete oldFrame;
337         size--;
338     }
339
340     void popAllFrames()
341     {
342         while (size)
343             popCurrentFrame();
344     }
345 };
346
347 static int matchError(int errorCode, MatchStack& stack)
348 {
349     stack.popAllFrames();
350     return errorCode;
351 }
352
353 /* Get the next UTF-8 character, not advancing the pointer, incrementing length
354  if there are extra bytes. This is called when we know we are in UTF-8 mode. */
355
356 static inline void getUTF8CharAndIncrementLength(int& c, const unsigned char* subjectPtr, int& len)
357 {
358     c = *subjectPtr;
359     if ((c & 0xc0) == 0xc0) {
360         int gcaa = kjs_pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */
361         int gcss = 6 * gcaa;
362         c = (c & kjs_pcre_utf8_table3[gcaa]) << gcss;
363         for (int gcii = 1; gcii <= gcaa; gcii++) {
364             gcss -= 6;
365             c |= (subjectPtr[gcii] & 0x3f) << gcss;
366         }
367         len += gcaa;
368     }
369 }
370
371 static inline void startNewGroup(MatchFrame* currentFrame)
372 {
373     /* At the start of a bracketed group, add the current subject pointer to the
374      stack of such pointers, to be re-instated at the end of the group when we hit
375      the closing ket. When match() is called in other circumstances, we don't add to
376      this stack. */
377     
378     currentFrame->locals.subpatternStart = currentFrame->args.subpatternStart;
379 }
380
381 // FIXME: "minimize" means "not greedy", we should invert the callers to ask for "greedy" to be less confusing
382 static inline void repeatInformationFromInstructionOffset(short instructionOffset, bool& minimize, int& minimumRepeats, int& maximumRepeats)
383 {
384     // Instruction offsets are based off of OP_CRSTAR, OP_STAR, OP_TYPESTAR, OP_NOTSTAR
385     static const char minimumRepeatsFromInstructionOffset[] = { 0, 0, 1, 1, 0, 0 };
386     static const int maximumRepeatsFromInstructionOffset[] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX, 1, 1 };
387
388     ASSERT(instructionOffset >= 0);
389     ASSERT(instructionOffset <= (OP_CRMINQUERY - OP_CRSTAR));
390
391     minimize = (instructionOffset & 1); // this assumes ordering: Instruction, MinimizeInstruction, Instruction2, MinimizeInstruction2
392     minimumRepeats = minimumRepeatsFromInstructionOffset[instructionOffset];
393     maximumRepeats = maximumRepeatsFromInstructionOffset[instructionOffset];
394 }
395
396 static int match(const UChar* subjectPtr, const unsigned char* instructionPtr, int offsetTop, MatchData& md)
397 {
398     bool isMatch = false;
399     int min;
400     bool minimize = false; /* Initialization not really needed, but some compilers think so. */
401     unsigned matchCount = 0;
402     
403     MatchStack stack;
404
405     /* The opcode jump table. */
406 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
407 #define EMIT_JUMP_TABLE_ENTRY(opcode) &&LABEL_OP_##opcode,
408     static void* opcodeJumpTable[256] = { FOR_EACH_OPCODE(EMIT_JUMP_TABLE_ENTRY) };
409 #undef EMIT_JUMP_TABLE_ENTRY
410 #endif
411     
412     /* One-time setup of the opcode jump table. */
413 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
414     for (int i = 255; !opcodeJumpTable[i]; i--)
415         opcodeJumpTable[i] = &&CAPTURING_BRACKET;
416 #endif
417     
418 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
419     // Shark shows this as a hot line
420     // Using a static const here makes this line disappear, but makes later access hotter (not sure why)
421     stack.currentFrame->returnLocation = &&RETURN;
422 #else
423     stack.currentFrame->returnLocation = 0;
424 #endif
425     stack.currentFrame->args.subjectPtr = subjectPtr;
426     stack.currentFrame->args.instructionPtr = instructionPtr;
427     stack.currentFrame->args.offsetTop = offsetTop;
428     stack.currentFrame->args.subpatternStart = 0;
429     startNewGroup(stack.currentFrame);
430     
431     /* This is where control jumps back to to effect "recursion" */
432     
433 RECURSE:
434     if (++matchCount > matchLimit)
435         return matchError(JSRegExpErrorHitLimit, stack);
436
437     /* Now start processing the operations. */
438     
439 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
440     while (true)
441 #endif
442     {
443         
444 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
445 #define BEGIN_OPCODE(opcode) LABEL_OP_##opcode
446 #define NEXT_OPCODE goto *opcodeJumpTable[*stack.currentFrame->args.instructionPtr]
447 #else
448 #define BEGIN_OPCODE(opcode) case OP_##opcode
449 #define NEXT_OPCODE continue
450 #endif
451         
452 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
453         NEXT_OPCODE;
454 #else
455         switch (*stack.currentFrame->args.instructionPtr)
456 #endif
457         {
458             /* Non-capturing bracket: optimized */
459                 
460             BEGIN_OPCODE(BRA):
461             NON_CAPTURING_BRACKET:
462                 DPRINTF(("start bracket 0\n"));
463                 do {
464                     RECURSIVE_MATCH_STARTNG_NEW_GROUP(2, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.subpatternStart);
465                     if (isMatch)
466                         RRETURN;
467                     stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
468                 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
469                 DPRINTF(("bracket 0 failed\n"));
470                 RRETURN;
471                 
472             /* Skip over large extraction number data if encountered. */
473                 
474             BEGIN_OPCODE(BRANUMBER):
475                 stack.currentFrame->args.instructionPtr += 3;
476                 NEXT_OPCODE;
477                 
478             /* End of the pattern. */
479                 
480             BEGIN_OPCODE(END):
481                 md.endMatchPtr = stack.currentFrame->args.subjectPtr;          /* Record where we ended */
482                 md.endOffsetTop = stack.currentFrame->args.offsetTop;   /* and how many extracts were taken */
483                 isMatch = true;
484                 RRETURN;
485                 
486             /* Assertion brackets. Check the alternative branches in turn - the
487              matching won't pass the KET for an assertion. If any one branch matches,
488              the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
489              start of each branch to move the current point backwards, so the code at
490              this level is identical to the lookahead case. */
491                 
492             BEGIN_OPCODE(ASSERT):
493                 do {
494                     RECURSIVE_MATCH_STARTNG_NEW_GROUP(6, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, NULL);
495                     if (isMatch)
496                         break;
497                     stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
498                 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
499                 if (*stack.currentFrame->args.instructionPtr == OP_KET)
500                     RRETURN_NO_MATCH;
501                 
502                 /* Continue from after the assertion, updating the offsets high water
503                  mark, since extracts may have been taken during the assertion. */
504                 
505                 advanceToEndOfBracket(stack.currentFrame->args.instructionPtr);
506                 stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
507                 stack.currentFrame->args.offsetTop = md.endOffsetTop;
508                 NEXT_OPCODE;
509                 
510             /* Negative assertion: all branches must fail to match */
511                 
512             BEGIN_OPCODE(ASSERT_NOT):
513                 do {
514                     RECURSIVE_MATCH_STARTNG_NEW_GROUP(7, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, NULL);
515                     if (isMatch)
516                         RRETURN_NO_MATCH;
517                     stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
518                 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
519                 
520                 stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
521                 NEXT_OPCODE;
522                 
523             /* An alternation is the end of a branch; scan along to find the end of the
524              bracketed group and go to there. */
525                 
526             BEGIN_OPCODE(ALT):
527                 advanceToEndOfBracket(stack.currentFrame->args.instructionPtr);
528                 NEXT_OPCODE;
529                 
530             /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
531              that it may occur zero times. It may repeat infinitely, or not at all -
532              i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
533              repeat limits are compiled as a number of copies, with the optional ones
534              preceded by BRAZERO or BRAMINZERO. */
535                 
536             BEGIN_OPCODE(BRAZERO): {
537                 stack.currentFrame->locals.startOfRepeatingBracket = stack.currentFrame->args.instructionPtr + 1;
538                 RECURSIVE_MATCH_STARTNG_NEW_GROUP(14, stack.currentFrame->locals.startOfRepeatingBracket, stack.currentFrame->args.subpatternStart);
539                 if (isMatch)
540                     RRETURN;
541                 advanceToEndOfBracket(stack.currentFrame->locals.startOfRepeatingBracket);
542                 stack.currentFrame->args.instructionPtr = stack.currentFrame->locals.startOfRepeatingBracket + 1 + LINK_SIZE;
543                 NEXT_OPCODE;
544             }
545                 
546             BEGIN_OPCODE(BRAMINZERO): {
547                 stack.currentFrame->locals.startOfRepeatingBracket = stack.currentFrame->args.instructionPtr + 1;
548                 advanceToEndOfBracket(stack.currentFrame->locals.startOfRepeatingBracket);
549                 RECURSIVE_MATCH_STARTNG_NEW_GROUP(15, stack.currentFrame->locals.startOfRepeatingBracket + 1 + LINK_SIZE, stack.currentFrame->args.subpatternStart);
550                 if (isMatch)
551                     RRETURN;
552                 stack.currentFrame->args.instructionPtr++;
553                 NEXT_OPCODE;
554             }
555                 
556             /* End of a group, repeated or non-repeating. If we are at the end of
557              an assertion "group", stop matching and return 1, but record the
558              current high water mark for use by positive assertions. Do this also
559              for the "once" (not-backup up) groups. */
560                 
561             BEGIN_OPCODE(KET):
562             BEGIN_OPCODE(KETRMIN):
563             BEGIN_OPCODE(KETRMAX):
564                 stack.currentFrame->locals.instructionPtrAtStartOfOnce = stack.currentFrame->args.instructionPtr - getLinkValue(stack.currentFrame->args.instructionPtr + 1);
565                 stack.currentFrame->args.subpatternStart = stack.currentFrame->locals.subpatternStart;
566                 stack.currentFrame->locals.subpatternStart = stack.currentFrame->previousFrame->args.subpatternStart;
567
568                 if (*stack.currentFrame->locals.instructionPtrAtStartOfOnce == OP_ASSERT || *stack.currentFrame->locals.instructionPtrAtStartOfOnce == OP_ASSERT_NOT) {
569                     md.endOffsetTop = stack.currentFrame->args.offsetTop;
570                     isMatch = true;
571                     RRETURN;
572                 }
573                 
574                 /* In all other cases except a conditional group we have to check the
575                  group number back at the start and if necessary complete handling an
576                  extraction by setting the offsets and bumping the high water mark. */
577                 
578                 stack.currentFrame->locals.number = *stack.currentFrame->locals.instructionPtrAtStartOfOnce - OP_BRA;
579                 
580                 /* For extended extraction brackets (large number), we have to fish out
581                  the number from a dummy opcode at the start. */
582                 
583                 if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX)
584                     stack.currentFrame->locals.number = get2ByteValue(stack.currentFrame->locals.instructionPtrAtStartOfOnce + 2 + LINK_SIZE);
585                 stack.currentFrame->locals.offset = stack.currentFrame->locals.number << 1;
586                 
587 #ifdef DEBUG
588                 printf("end bracket %d", stack.currentFrame->locals.number);
589                 printf("\n");
590 #endif
591                 
592                 /* Test for a numbered group. This includes groups called as a result
593                  of recursion. Note that whole-pattern recursion is coded as a recurse
594                  into group 0, so it won't be picked up here. Instead, we catch it when
595                  the OP_END is reached. */
596                 
597                 if (stack.currentFrame->locals.number > 0) {
598                     if (stack.currentFrame->locals.offset >= md.offsetMax)
599                         md.offsetOverflow = true;
600                     else {
601                         md.offsetVector[stack.currentFrame->locals.offset] =
602                         md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number];
603                         md.offsetVector[stack.currentFrame->locals.offset+1] = stack.currentFrame->args.subjectPtr - md.startSubject;
604                         if (stack.currentFrame->args.offsetTop <= stack.currentFrame->locals.offset)
605                             stack.currentFrame->args.offsetTop = stack.currentFrame->locals.offset + 2;
606                     }
607                 }
608                 
609                 /* For a non-repeating ket, just continue at this level. This also
610                  happens for a repeating ket if no characters were matched in the group.
611                  This is the forcible breaking of infinite loops as implemented in Perl
612                  5.005. If there is an options reset, it will get obeyed in the normal
613                  course of events. */
614                 
615                 if (*stack.currentFrame->args.instructionPtr == OP_KET || stack.currentFrame->args.subjectPtr == stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
616                     stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
617                     NEXT_OPCODE;
618                 }
619                 
620                 /* The repeating kets try the rest of the pattern or restart from the
621                  preceding bracket, in the appropriate order. */
622                 
623                 if (*stack.currentFrame->args.instructionPtr == OP_KETRMIN) {
624                     RECURSIVE_MATCH(16, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.subpatternStart);
625                     if (isMatch)
626                         RRETURN;
627                     RECURSIVE_MATCH_STARTNG_NEW_GROUP(17, stack.currentFrame->locals.instructionPtrAtStartOfOnce, stack.currentFrame->args.subpatternStart);
628                     if (isMatch)
629                         RRETURN;
630                 } else { /* OP_KETRMAX */
631                     RECURSIVE_MATCH_STARTNG_NEW_GROUP(18, stack.currentFrame->locals.instructionPtrAtStartOfOnce, stack.currentFrame->args.subpatternStart);
632                     if (isMatch)
633                         RRETURN;
634                     RECURSIVE_MATCH(19, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.subpatternStart);
635                     if (isMatch)
636                         RRETURN;
637                 }
638                 RRETURN;
639                 
640             /* Start of subject, or after internal newline if multiline. */
641                 
642             BEGIN_OPCODE(CIRC):
643                 if (stack.currentFrame->args.subjectPtr != md.startSubject && (!md.multiline || !isNewline(stack.currentFrame->args.subjectPtr[-1])))
644                     RRETURN_NO_MATCH;
645                 stack.currentFrame->args.instructionPtr++;
646                 NEXT_OPCODE;
647                 
648             /* End of subject, or before internal newline if multiline. */
649                 
650             BEGIN_OPCODE(DOLL):
651                 if (stack.currentFrame->args.subjectPtr < md.endSubject && (!md.multiline || !isNewline(*stack.currentFrame->args.subjectPtr)))
652                     RRETURN_NO_MATCH;
653                 stack.currentFrame->args.instructionPtr++;
654                 NEXT_OPCODE;
655                 
656             /* Word boundary assertions */
657                 
658             BEGIN_OPCODE(NOT_WORD_BOUNDARY):
659             BEGIN_OPCODE(WORD_BOUNDARY): {
660                 bool currentCharIsWordChar = false;
661                 bool previousCharIsWordChar = false;
662                 
663                 if (stack.currentFrame->args.subjectPtr > md.startSubject)
664                     previousCharIsWordChar = isWordChar(stack.currentFrame->args.subjectPtr[-1]);
665                 if (stack.currentFrame->args.subjectPtr < md.endSubject)
666                     currentCharIsWordChar = isWordChar(*stack.currentFrame->args.subjectPtr);
667                 
668                 /* Now see if the situation is what we want */
669                 bool wordBoundaryDesired = (*stack.currentFrame->args.instructionPtr++ == OP_WORD_BOUNDARY);
670                 if (wordBoundaryDesired ? currentCharIsWordChar == previousCharIsWordChar : currentCharIsWordChar != previousCharIsWordChar)
671                     RRETURN_NO_MATCH;
672                 NEXT_OPCODE;
673             }
674                 
675             /* Match a single character type; inline for speed */
676                 
677             BEGIN_OPCODE(NOT_NEWLINE):
678                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
679                     RRETURN_NO_MATCH;
680                 if (isNewline(*stack.currentFrame->args.subjectPtr++))
681                     RRETURN_NO_MATCH;
682                 stack.currentFrame->args.instructionPtr++;
683                 NEXT_OPCODE;
684
685             BEGIN_OPCODE(NOT_DIGIT):
686                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
687                     RRETURN_NO_MATCH;
688                 if (isASCIIDigit(*stack.currentFrame->args.subjectPtr++))
689                     RRETURN_NO_MATCH;
690                 stack.currentFrame->args.instructionPtr++;
691                 NEXT_OPCODE;
692
693             BEGIN_OPCODE(DIGIT):
694                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
695                     RRETURN_NO_MATCH;
696                 if (!isASCIIDigit(*stack.currentFrame->args.subjectPtr++))
697                     RRETURN_NO_MATCH;
698                 stack.currentFrame->args.instructionPtr++;
699                 NEXT_OPCODE;
700
701             BEGIN_OPCODE(NOT_WHITESPACE):
702                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
703                     RRETURN_NO_MATCH;
704                 if (isSpaceChar(*stack.currentFrame->args.subjectPtr++))
705                     RRETURN_NO_MATCH;
706                 stack.currentFrame->args.instructionPtr++;
707                 NEXT_OPCODE;
708
709             BEGIN_OPCODE(WHITESPACE):
710                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
711                     RRETURN_NO_MATCH;
712                 if (!isSpaceChar(*stack.currentFrame->args.subjectPtr++))
713                     RRETURN_NO_MATCH;
714                 stack.currentFrame->args.instructionPtr++;
715                 NEXT_OPCODE;
716                 
717             BEGIN_OPCODE(NOT_WORDCHAR):
718                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
719                     RRETURN_NO_MATCH;
720                 if (isWordChar(*stack.currentFrame->args.subjectPtr++))
721                     RRETURN_NO_MATCH;
722                 stack.currentFrame->args.instructionPtr++;
723                 NEXT_OPCODE;
724                 
725             BEGIN_OPCODE(WORDCHAR):
726                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
727                     RRETURN_NO_MATCH;
728                 if (!isWordChar(*stack.currentFrame->args.subjectPtr++))
729                     RRETURN_NO_MATCH;
730                 stack.currentFrame->args.instructionPtr++;
731                 NEXT_OPCODE;
732                 
733             /* Match a back reference, possibly repeatedly. Look past the end of the
734              item to see if there is repeat information following. The code is similar
735              to that for character classes, but repeated for efficiency. Then obey
736              similar code to character type repeats - written out again for speed.
737              However, if the referenced string is the empty string, always treat
738              it as matched, any number of times (otherwise there could be infinite
739              loops). */
740                 
741             BEGIN_OPCODE(REF):
742                 stack.currentFrame->locals.offset = get2ByteValue(stack.currentFrame->args.instructionPtr + 1) << 1;               /* Doubled ref number */
743                 stack.currentFrame->args.instructionPtr += 3;                                 /* Advance past item */
744                 
745                 /* If the reference is unset, set the length to be longer than the amount
746                  of subject left; this ensures that every attempt at a match fails. We
747                  can't just fail here, because of the possibility of quantifiers with zero
748                  minima. */
749                 
750                 if (stack.currentFrame->locals.offset >= stack.currentFrame->args.offsetTop || md.offsetVector[stack.currentFrame->locals.offset] < 0)
751                     stack.currentFrame->locals.length = 0;
752                 else
753                     stack.currentFrame->locals.length = md.offsetVector[stack.currentFrame->locals.offset+1] - md.offsetVector[stack.currentFrame->locals.offset];
754                 
755                 /* Set up for repetition, or handle the non-repeated case */
756                 
757                 switch (*stack.currentFrame->args.instructionPtr) {
758                     case OP_CRSTAR:
759                     case OP_CRMINSTAR:
760                     case OP_CRPLUS:
761                     case OP_CRMINPLUS:
762                     case OP_CRQUERY:
763                     case OP_CRMINQUERY:
764                         repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
765                         break;
766                         
767                     case OP_CRRANGE:
768                     case OP_CRMINRANGE:
769                         minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
770                         min = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
771                         stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 3);
772                         if (stack.currentFrame->locals.max == 0)
773                             stack.currentFrame->locals.max = INT_MAX;
774                         stack.currentFrame->args.instructionPtr += 5;
775                         break;
776                     
777                     default:               /* No repeat follows */
778                         if (!matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
779                             RRETURN_NO_MATCH;
780                         stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
781                         NEXT_OPCODE;
782                 }
783                 
784                 /* If the length of the reference is zero, just continue with the
785                  main loop. */
786                 
787                 if (stack.currentFrame->locals.length == 0)
788                     NEXT_OPCODE;
789                 
790                 /* First, ensure the minimum number of matches are present. */
791                 
792                 for (int i = 1; i <= min; i++) {
793                     if (!matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
794                         RRETURN_NO_MATCH;
795                     stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
796                 }
797                 
798                 /* If min = max, continue at the same level without recursion.
799                  They are not both allowed to be zero. */
800                 
801                 if (min == stack.currentFrame->locals.max)
802                     NEXT_OPCODE;
803                 
804                 /* If minimizing, keep trying and advancing the pointer */
805                 
806                 if (minimize) {
807                     for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
808                         RECURSIVE_MATCH(20, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
809                         if (isMatch)
810                             RRETURN;
811                         if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || !matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
812                             RRETURN;
813                         stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
814                     }
815                     /* Control never reaches here */
816                 }
817                 
818                 /* If maximizing, find the longest string and work backwards */
819                 
820                 else {
821                     stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
822                     for (int i = min; i < stack.currentFrame->locals.max; i++) {
823                         if (!matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
824                             break;
825                         stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
826                     }
827                     while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
828                         RECURSIVE_MATCH(21, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
829                         if (isMatch)
830                             RRETURN;
831                         stack.currentFrame->args.subjectPtr -= stack.currentFrame->locals.length;
832                     }
833                     RRETURN_NO_MATCH;
834                 }
835                 /* Control never reaches here */
836                 
837             /* Match a bit-mapped character class, possibly repeatedly. This op code is
838              used when all the characters in the class have values in the range 0-255,
839              and either the matching is caseful, or the characters are in the range
840              0-127 when UTF-8 processing is enabled. The only difference between
841              OP_CLASS and OP_NCLASS occurs when a data character outside the range is
842              encountered.
843              
844              First, look past the end of the item to see if there is repeat information
845              following. Then obey similar code to character type repeats - written out
846              again for speed. */
847                 
848             BEGIN_OPCODE(NCLASS):
849             BEGIN_OPCODE(CLASS):
850                 stack.currentFrame->locals.data = stack.currentFrame->args.instructionPtr + 1;                /* Save for matching */
851                 stack.currentFrame->args.instructionPtr += 33;                     /* Advance past the item */
852                 
853                 switch (*stack.currentFrame->args.instructionPtr) {
854                     case OP_CRSTAR:
855                     case OP_CRMINSTAR:
856                     case OP_CRPLUS:
857                     case OP_CRMINPLUS:
858                     case OP_CRQUERY:
859                     case OP_CRMINQUERY:
860                         repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
861                         break;
862                         
863                     case OP_CRRANGE:
864                     case OP_CRMINRANGE:
865                         minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
866                         min = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
867                         stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 3);
868                         if (stack.currentFrame->locals.max == 0)
869                             stack.currentFrame->locals.max = INT_MAX;
870                         stack.currentFrame->args.instructionPtr += 5;
871                         break;
872                         
873                     default:               /* No repeat follows */
874                         min = stack.currentFrame->locals.max = 1;
875                         break;
876                 }
877                 
878                 /* First, ensure the minimum number of matches are present. */
879                 
880                 for (int i = 1; i <= min; i++) {
881                     if (stack.currentFrame->args.subjectPtr >= md.endSubject)
882                         RRETURN_NO_MATCH;
883                     int c = *stack.currentFrame->args.subjectPtr++;
884                     if (c > 255) {
885                         if (stack.currentFrame->locals.data[-1] == OP_CLASS)
886                             RRETURN_NO_MATCH;
887                     } else {
888                         if (!(stack.currentFrame->locals.data[c / 8] & (1 << (c & 7))))
889                             RRETURN_NO_MATCH;
890                     }
891                 }
892                 
893                 /* If max == min we can continue with the main loop without the
894                  need to recurse. */
895                 
896                 if (min == stack.currentFrame->locals.max)
897                     NEXT_OPCODE;      
898                 
899                 /* If minimizing, keep testing the rest of the expression and advancing
900                  the pointer while it matches the class. */
901                 if (minimize) {
902                     for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
903                         RECURSIVE_MATCH(22, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
904                         if (isMatch)
905                             RRETURN;
906                         if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
907                             RRETURN;
908                         int c = *stack.currentFrame->args.subjectPtr++;
909                         if (c > 255) {
910                             if (stack.currentFrame->locals.data[-1] == OP_CLASS)
911                                 RRETURN;
912                         } else {
913                             if ((stack.currentFrame->locals.data[c/8] & (1 << (c&7))) == 0)
914                                 RRETURN;
915                         }
916                     }
917                     /* Control never reaches here */
918                 }
919                 /* If maximizing, find the longest possible run, then work backwards. */
920                 else {
921                     stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
922                     
923                     for (int i = min; i < stack.currentFrame->locals.max; i++) {
924                         if (stack.currentFrame->args.subjectPtr >= md.endSubject)
925                             break;
926                         int c = *stack.currentFrame->args.subjectPtr;
927                         if (c > 255) {
928                             if (stack.currentFrame->locals.data[-1] == OP_CLASS)
929                                 break;
930                         } else {
931                             if (!(stack.currentFrame->locals.data[c / 8] & (1 << (c & 7))))
932                                 break;
933                         }
934                         ++stack.currentFrame->args.subjectPtr;
935                     }
936                     for (;;) {
937                         RECURSIVE_MATCH(24, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
938                         if (isMatch)
939                             RRETURN;
940                         if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
941                             break;        /* Stop if tried at original pos */
942                     }
943                     
944                     RRETURN;
945                 }
946                 /* Control never reaches here */
947                 
948             /* Match an extended character class. */
949                 
950             BEGIN_OPCODE(XCLASS):
951                 stack.currentFrame->locals.data = stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE;                /* Save for matching */
952                 stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);                      /* Advance past the item */
953                 
954                 switch (*stack.currentFrame->args.instructionPtr) {
955                     case OP_CRSTAR:
956                     case OP_CRMINSTAR:
957                     case OP_CRPLUS:
958                     case OP_CRMINPLUS:
959                     case OP_CRQUERY:
960                     case OP_CRMINQUERY:
961                         repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
962                         break;
963                         
964                     case OP_CRRANGE:
965                     case OP_CRMINRANGE:
966                         minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
967                         min = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
968                         stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 3);
969                         if (stack.currentFrame->locals.max == 0)
970                             stack.currentFrame->locals.max = INT_MAX;
971                         stack.currentFrame->args.instructionPtr += 5;
972                         break;
973                         
974                     default:               /* No repeat follows */
975                         min = stack.currentFrame->locals.max = 1;
976                 }
977                 
978                 /* First, ensure the minimum number of matches are present. */
979                 
980                 for (int i = 1; i <= min; i++) {
981                     if (stack.currentFrame->args.subjectPtr >= md.endSubject)
982                         RRETURN_NO_MATCH;
983                     int c = *stack.currentFrame->args.subjectPtr++;
984                     if (!kjs_pcre_xclass(c, stack.currentFrame->locals.data))
985                         RRETURN_NO_MATCH;
986                 }
987                 
988                 /* If max == min we can continue with the main loop without the
989                  need to recurse. */
990                 
991                 if (min == stack.currentFrame->locals.max)
992                     NEXT_OPCODE;
993                 
994                 /* If minimizing, keep testing the rest of the expression and advancing
995                  the pointer while it matches the class. */
996                 
997                 if (minimize) {
998                     for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
999                         RECURSIVE_MATCH(26, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1000                         if (isMatch)
1001                             RRETURN;
1002                         if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
1003                             RRETURN;
1004                         int c = *stack.currentFrame->args.subjectPtr++;
1005                         if (!kjs_pcre_xclass(c, stack.currentFrame->locals.data))
1006                             RRETURN;
1007                     }
1008                     /* Control never reaches here */
1009                 }
1010                 
1011                 /* If maximizing, find the longest possible run, then work backwards. */
1012                 
1013                 else {
1014                     stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1015                     for (int i = min; i < stack.currentFrame->locals.max; i++) {
1016                         if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1017                             break;
1018                         int c = *stack.currentFrame->args.subjectPtr;
1019                         if (!kjs_pcre_xclass(c, stack.currentFrame->locals.data))
1020                             break;
1021                         ++stack.currentFrame->args.subjectPtr;
1022                     }
1023                     for(;;) {
1024                         RECURSIVE_MATCH(27, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1025                         if (isMatch)
1026                             RRETURN;
1027                         if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
1028                             break;        /* Stop if tried at original pos */
1029                     }
1030                     RRETURN;
1031                 }
1032                 
1033                 /* Control never reaches here */
1034                 
1035             /* Match a single character, casefully */
1036                 
1037             BEGIN_OPCODE(CHAR):
1038                 stack.currentFrame->locals.length = 1;
1039                 stack.currentFrame->args.instructionPtr++;
1040                 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
1041                 stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length;
1042                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1043                     RRETURN_NO_MATCH;
1044                 if (stack.currentFrame->locals.fc != *stack.currentFrame->args.subjectPtr++)
1045                     RRETURN_NO_MATCH;
1046                 NEXT_OPCODE;
1047                 
1048             /* Match a single character, caselessly */
1049                 
1050             BEGIN_OPCODE(CHAR_IGNORING_CASE): {
1051                 stack.currentFrame->locals.length = 1;
1052                 stack.currentFrame->args.instructionPtr++;
1053                 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
1054                 stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length;
1055                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1056                     RRETURN_NO_MATCH;
1057                 int dc = *stack.currentFrame->args.subjectPtr++;
1058                 if (stack.currentFrame->locals.fc != dc && kjs_pcre_ucp_othercase(stack.currentFrame->locals.fc) != dc)
1059                     RRETURN_NO_MATCH;
1060                 NEXT_OPCODE;
1061             }
1062                 
1063             /* Match a single ASCII character. */
1064                 
1065             BEGIN_OPCODE(ASCII_CHAR):
1066                 if (md.endSubject == stack.currentFrame->args.subjectPtr)
1067                     RRETURN_NO_MATCH;
1068                 if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->args.instructionPtr[1])
1069                     RRETURN_NO_MATCH;
1070                 ++stack.currentFrame->args.subjectPtr;
1071                 stack.currentFrame->args.instructionPtr += 2;
1072                 NEXT_OPCODE;
1073                 
1074             /* Match one of two cases of an ASCII letter. */
1075                 
1076             BEGIN_OPCODE(ASCII_LETTER_IGNORING_CASE):
1077                 if (md.endSubject == stack.currentFrame->args.subjectPtr)
1078                     RRETURN_NO_MATCH;
1079                 if ((*stack.currentFrame->args.subjectPtr | 0x20) != stack.currentFrame->args.instructionPtr[1])
1080                     RRETURN_NO_MATCH;
1081                 ++stack.currentFrame->args.subjectPtr;
1082                 stack.currentFrame->args.instructionPtr += 2;
1083                 NEXT_OPCODE;
1084                 
1085             /* Match a single character repeatedly; different opcodes share code. */
1086                 
1087             BEGIN_OPCODE(EXACT):
1088                 min = stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
1089                 minimize = false;
1090                 stack.currentFrame->args.instructionPtr += 3;
1091                 goto REPEATCHAR;
1092                 
1093             BEGIN_OPCODE(UPTO):
1094             BEGIN_OPCODE(MINUPTO):
1095                 min = 0;
1096                 stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
1097                 minimize = *stack.currentFrame->args.instructionPtr == OP_MINUPTO;
1098                 stack.currentFrame->args.instructionPtr += 3;
1099                 goto REPEATCHAR;
1100                 
1101             BEGIN_OPCODE(STAR):
1102             BEGIN_OPCODE(MINSTAR):
1103             BEGIN_OPCODE(PLUS):
1104             BEGIN_OPCODE(MINPLUS):
1105             BEGIN_OPCODE(QUERY):
1106             BEGIN_OPCODE(MINQUERY):
1107                 repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_STAR, minimize, min, stack.currentFrame->locals.max);
1108                 
1109                 /* Common code for all repeated single-character matches. We can give
1110                  up quickly if there are fewer than the minimum number of characters left in
1111                  the subject. */
1112                 
1113             REPEATCHAR:
1114                 
1115                 stack.currentFrame->locals.length = 1;
1116                 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
1117                 if (min * (stack.currentFrame->locals.fc > 0xFFFF ? 2 : 1) > md.endSubject - stack.currentFrame->args.subjectPtr)
1118                     RRETURN_NO_MATCH;
1119                 stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length;
1120                 
1121                 if (stack.currentFrame->locals.fc <= 0xFFFF) {
1122                     int othercase = md.ignoreCase ? kjs_pcre_ucp_othercase(stack.currentFrame->locals.fc) : -1;
1123                     
1124                     for (int i = 1; i <= min; i++) {
1125                         if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != othercase)
1126                             RRETURN_NO_MATCH;
1127                         ++stack.currentFrame->args.subjectPtr;
1128                     }
1129                     
1130                     if (min == stack.currentFrame->locals.max)
1131                         NEXT_OPCODE;
1132                     
1133                     if (minimize) {
1134                         stack.currentFrame->locals.repeatOthercase = othercase;
1135                         for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1136                             RECURSIVE_MATCH(28, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1137                             if (isMatch)
1138                                 RRETURN;
1139                             if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
1140                                 RRETURN;
1141                             if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.repeatOthercase)
1142                                 RRETURN;
1143                             ++stack.currentFrame->args.subjectPtr;
1144                         }
1145                         /* Control never reaches here */
1146                     } else {
1147                         stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1148                         for (int i = min; i < stack.currentFrame->locals.max; i++) {
1149                             if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1150                                 break;
1151                             if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != othercase)
1152                                 break;
1153                             ++stack.currentFrame->args.subjectPtr;
1154                         }
1155                         while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
1156                             RECURSIVE_MATCH(29, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1157                             if (isMatch)
1158                                 RRETURN;
1159                             --stack.currentFrame->args.subjectPtr;
1160                         }
1161                         RRETURN_NO_MATCH;
1162                     }
1163                     /* Control never reaches here */
1164                 } else {
1165                     /* No case on surrogate pairs, so no need to bother with "othercase". */
1166                     
1167                     for (int i = 1; i <= min; i++) {
1168                         if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
1169                             RRETURN_NO_MATCH;
1170                         stack.currentFrame->args.subjectPtr += 2;
1171                     }
1172                     
1173                     if (min == stack.currentFrame->locals.max)
1174                         NEXT_OPCODE;
1175                     
1176                     if (minimize) {
1177                         for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1178                             RECURSIVE_MATCH(30, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1179                             if (isMatch)
1180                                 RRETURN;
1181                             if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
1182                                 RRETURN;
1183                             if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
1184                                 RRETURN;
1185                             stack.currentFrame->args.subjectPtr += 2;
1186                         }
1187                         /* Control never reaches here */
1188                     } else {
1189                         stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1190                         for (int i = min; i < stack.currentFrame->locals.max; i++) {
1191                             if (stack.currentFrame->args.subjectPtr > md.endSubject - 2)
1192                                 break;
1193                             if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
1194                                 break;
1195                             stack.currentFrame->args.subjectPtr += 2;
1196                         }
1197                         while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
1198                             RECURSIVE_MATCH(31, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1199                             if (isMatch)
1200                                 RRETURN;
1201                             stack.currentFrame->args.subjectPtr -= 2;
1202                         }
1203                         RRETURN_NO_MATCH;
1204                     }
1205                     /* Control never reaches here */
1206                 }
1207                 /* Control never reaches here */
1208                 
1209             /* Match a negated single one-byte character. */
1210                 
1211             BEGIN_OPCODE(NOT): {
1212                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1213                     RRETURN_NO_MATCH;
1214                 stack.currentFrame->args.instructionPtr++;
1215                 int c = *stack.currentFrame->args.subjectPtr++;
1216                 if (md.ignoreCase) {
1217                     if (c < 128)
1218                         c = toLowerCase(c);
1219                     if (toLowerCase(*stack.currentFrame->args.instructionPtr++) == c)
1220                         RRETURN_NO_MATCH;
1221                 } else {
1222                     if (*stack.currentFrame->args.instructionPtr++ == c)
1223                         RRETURN_NO_MATCH;
1224                 }
1225                 NEXT_OPCODE;
1226             }
1227                 
1228             /* Match a negated single one-byte character repeatedly. This is almost a
1229              repeat of the code for a repeated single character, but I haven't found a
1230              nice way of commoning these up that doesn't require a test of the
1231              positive/negative option for each character match. Maybe that wouldn't add
1232              very much to the time taken, but character matching *is* what this is all
1233              about... */
1234                 
1235             BEGIN_OPCODE(NOTEXACT):
1236                 min = stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
1237                 minimize = false;
1238                 stack.currentFrame->args.instructionPtr += 3;
1239                 goto REPEATNOTCHAR;
1240                 
1241             BEGIN_OPCODE(NOTUPTO):
1242             BEGIN_OPCODE(NOTMINUPTO):
1243                 min = 0;
1244                 stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
1245                 minimize = *stack.currentFrame->args.instructionPtr == OP_NOTMINUPTO;
1246                 stack.currentFrame->args.instructionPtr += 3;
1247                 goto REPEATNOTCHAR;
1248                 
1249             BEGIN_OPCODE(NOTSTAR):
1250             BEGIN_OPCODE(NOTMINSTAR):
1251             BEGIN_OPCODE(NOTPLUS):
1252             BEGIN_OPCODE(NOTMINPLUS):
1253             BEGIN_OPCODE(NOTQUERY):
1254             BEGIN_OPCODE(NOTMINQUERY):
1255                 repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_NOTSTAR, minimize, min, stack.currentFrame->locals.max);
1256                 
1257             /* Common code for all repeated single-byte matches. We can give up quickly
1258              if there are fewer than the minimum number of bytes left in the
1259              subject. */
1260                 
1261             REPEATNOTCHAR:
1262                 if (min > md.endSubject - stack.currentFrame->args.subjectPtr)
1263                     RRETURN_NO_MATCH;
1264                 stack.currentFrame->locals.fc = *stack.currentFrame->args.instructionPtr++;
1265                 
1266                 /* The code is duplicated for the caseless and caseful cases, for speed,
1267                  since matching characters is likely to be quite common. First, ensure the
1268                  minimum number of matches are present. If min = max, continue at the same
1269                  level without recursing. Otherwise, if minimizing, keep trying the rest of
1270                  the expression and advancing one matching character if failing, up to the
1271                  maximum. Alternatively, if maximizing, find the maximum number of
1272                  characters and work backwards. */
1273                 
1274                 DPRINTF(("negative matching %c{%d,%d}\n", stack.currentFrame->locals.fc, min, stack.currentFrame->locals.max));
1275                 
1276                 if (md.ignoreCase) {
1277                     if (stack.currentFrame->locals.fc < 128)
1278                         stack.currentFrame->locals.fc = toLowerCase(stack.currentFrame->locals.fc);
1279                     
1280                     for (int i = 1; i <= min; i++) {
1281                         int d = *stack.currentFrame->args.subjectPtr++;
1282                         if (d < 128)
1283                             d = toLowerCase(d);
1284                         if (stack.currentFrame->locals.fc == d)
1285                             RRETURN_NO_MATCH;
1286                     }
1287                     
1288                     if (min == stack.currentFrame->locals.max)
1289                         NEXT_OPCODE;      
1290                     
1291                     if (minimize) {
1292                         for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1293                             RECURSIVE_MATCH(38, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1294                             if (isMatch)
1295                                 RRETURN;
1296                             int d = *stack.currentFrame->args.subjectPtr++;
1297                             if (d < 128)
1298                                 d = toLowerCase(d);
1299                             if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject || stack.currentFrame->locals.fc == d)
1300                                 RRETURN;
1301                         }
1302                         /* Control never reaches here */
1303                     }
1304                     
1305                     /* Maximize case */
1306                     
1307                     else {
1308                         stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1309                         
1310                         for (int i = min; i < stack.currentFrame->locals.max; i++) {
1311                             if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1312                                 break;
1313                             int d = *stack.currentFrame->args.subjectPtr;
1314                             if (d < 128)
1315                                 d = toLowerCase(d);
1316                             if (stack.currentFrame->locals.fc == d)
1317                                 break;
1318                             ++stack.currentFrame->args.subjectPtr;
1319                         }
1320                         for (;;) {
1321                             RECURSIVE_MATCH(40, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1322                             if (isMatch)
1323                                 RRETURN;
1324                             if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
1325                                 break;        /* Stop if tried at original pos */
1326                         }
1327                         
1328                         RRETURN;
1329                     }
1330                     /* Control never reaches here */
1331                 }
1332                 
1333                 /* Caseful comparisons */
1334                 
1335                 else {
1336                     for (int i = 1; i <= min; i++) {
1337                         int d = *stack.currentFrame->args.subjectPtr++;
1338                         if (stack.currentFrame->locals.fc == d)
1339                             RRETURN_NO_MATCH;
1340                     }
1341
1342                     if (min == stack.currentFrame->locals.max)
1343                         NEXT_OPCODE;
1344                     
1345                     if (minimize) {
1346                         for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1347                             RECURSIVE_MATCH(42, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1348                             if (isMatch)
1349                                 RRETURN;
1350                             int d = *stack.currentFrame->args.subjectPtr++;
1351                             if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject || stack.currentFrame->locals.fc == d)
1352                                 RRETURN;
1353                         }
1354                         /* Control never reaches here */
1355                     }
1356                     
1357                     /* Maximize case */
1358                     
1359                     else {
1360                         stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1361                         
1362                         for (int i = min; i < stack.currentFrame->locals.max; i++) {
1363                             if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1364                                 break;
1365                             int d = *stack.currentFrame->args.subjectPtr;
1366                             if (stack.currentFrame->locals.fc == d)
1367                                 break;
1368                             ++stack.currentFrame->args.subjectPtr;
1369                         }
1370                         for (;;) {
1371                             RECURSIVE_MATCH(44, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1372                             if (isMatch)
1373                                 RRETURN;
1374                             if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
1375                                 break;        /* Stop if tried at original pos */
1376                         }
1377
1378                         RRETURN;
1379                     }
1380                 }
1381                 /* Control never reaches here */
1382                 
1383             /* Match a single character type repeatedly; several different opcodes
1384              share code. This is very similar to the code for single characters, but we
1385              repeat it in the interests of efficiency. */
1386                 
1387             BEGIN_OPCODE(TYPEEXACT):
1388                 min = stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
1389                 minimize = true;
1390                 stack.currentFrame->args.instructionPtr += 3;
1391                 goto REPEATTYPE;
1392                 
1393             BEGIN_OPCODE(TYPEUPTO):
1394             BEGIN_OPCODE(TYPEMINUPTO):
1395                 min = 0;
1396                 stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
1397                 minimize = *stack.currentFrame->args.instructionPtr == OP_TYPEMINUPTO;
1398                 stack.currentFrame->args.instructionPtr += 3;
1399                 goto REPEATTYPE;
1400                 
1401             BEGIN_OPCODE(TYPESTAR):
1402             BEGIN_OPCODE(TYPEMINSTAR):
1403             BEGIN_OPCODE(TYPEPLUS):
1404             BEGIN_OPCODE(TYPEMINPLUS):
1405             BEGIN_OPCODE(TYPEQUERY):
1406             BEGIN_OPCODE(TYPEMINQUERY):
1407                 repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_TYPESTAR, minimize, min, stack.currentFrame->locals.max);
1408                 
1409                 /* Common code for all repeated single character type matches. Note that
1410                  in UTF-8 mode, '.' matches a character of any length, but for the other
1411                  character types, the valid characters are all one-byte long. */
1412                 
1413             REPEATTYPE:
1414                 stack.currentFrame->locals.ctype = *stack.currentFrame->args.instructionPtr++;      /* Code for the character type */
1415                 
1416                 /* First, ensure the minimum number of matches are present. Use inline
1417                  code for maximizing the speed, and do the type test once at the start
1418                  (i.e. keep it out of the loop). Also we can test that there are at least
1419                  the minimum number of characters before we start. */
1420                 
1421                 if (min > md.endSubject - stack.currentFrame->args.subjectPtr)
1422                     RRETURN_NO_MATCH;
1423                 if (min > 0) {
1424                     switch (stack.currentFrame->locals.ctype) {
1425                         case OP_NOT_NEWLINE:
1426                             for (int i = 1; i <= min; i++) {
1427                                 if (isNewline(*stack.currentFrame->args.subjectPtr))
1428                                     RRETURN_NO_MATCH;
1429                                 ++stack.currentFrame->args.subjectPtr;
1430                             }
1431                             break;
1432                             
1433                         case OP_NOT_DIGIT:
1434                             for (int i = 1; i <= min; i++) {
1435                                 if (isASCIIDigit(*stack.currentFrame->args.subjectPtr))
1436                                     RRETURN_NO_MATCH;
1437                                 ++stack.currentFrame->args.subjectPtr;
1438                             }
1439                             break;
1440                             
1441                         case OP_DIGIT:
1442                             for (int i = 1; i <= min; i++) {
1443                                 if (!isASCIIDigit(*stack.currentFrame->args.subjectPtr))
1444                                     RRETURN_NO_MATCH;
1445                                 ++stack.currentFrame->args.subjectPtr;
1446                             }
1447                             break;
1448                             
1449                         case OP_NOT_WHITESPACE:
1450                             for (int i = 1; i <= min; i++) {
1451                                 if (isSpaceChar(*stack.currentFrame->args.subjectPtr))
1452                                     RRETURN_NO_MATCH;
1453                                 ++stack.currentFrame->args.subjectPtr;
1454                             }
1455                             break;
1456                             
1457                         case OP_WHITESPACE:
1458                             for (int i = 1; i <= min; i++) {
1459                                 if (!isSpaceChar(*stack.currentFrame->args.subjectPtr))
1460                                     RRETURN_NO_MATCH;
1461                                 ++stack.currentFrame->args.subjectPtr;
1462                             }
1463                             break;
1464                             
1465                         case OP_NOT_WORDCHAR:
1466                             for (int i = 1; i <= min; i++) {
1467                                 if (isWordChar(*stack.currentFrame->args.subjectPtr))
1468                                     RRETURN_NO_MATCH;
1469                                 ++stack.currentFrame->args.subjectPtr;
1470                             }
1471                             break;
1472                             
1473                         case OP_WORDCHAR:
1474                             for (int i = 1; i <= min; i++) {
1475                                 if (!isWordChar(*stack.currentFrame->args.subjectPtr))
1476                                     RRETURN_NO_MATCH;
1477                                 ++stack.currentFrame->args.subjectPtr;
1478                             }
1479                             break;
1480                             
1481                         default:
1482                             ASSERT_NOT_REACHED();
1483                             return matchError(JSRegExpErrorInternal, stack);
1484                     }  /* End switch(stack.currentFrame->locals.ctype) */
1485                 }
1486                 
1487                 /* If min = max, continue at the same level without recursing */
1488                 
1489                 if (min == stack.currentFrame->locals.max)
1490                     NEXT_OPCODE;    
1491                 
1492                 /* If minimizing, we have to test the rest of the pattern before each
1493                  subsequent match. */
1494                 
1495                 if (minimize) {
1496                     for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1497                         RECURSIVE_MATCH(48, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1498                         if (isMatch)
1499                             RRETURN;
1500                         if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
1501                             RRETURN;
1502                         
1503                         int c = *stack.currentFrame->args.subjectPtr++;
1504                         switch (stack.currentFrame->locals.ctype) {
1505                             case OP_NOT_NEWLINE:
1506                                 if (isNewline(c))
1507                                     RRETURN;
1508                                 break;
1509                                 
1510                             case OP_NOT_DIGIT:
1511                                 if (isASCIIDigit(c))
1512                                     RRETURN;
1513                                 break;
1514                                 
1515                             case OP_DIGIT:
1516                                 if (!isASCIIDigit(c))
1517                                     RRETURN;
1518                                 break;
1519                                 
1520                             case OP_NOT_WHITESPACE:
1521                                 if (isSpaceChar(c))
1522                                     RRETURN;
1523                                 break;
1524                                 
1525                             case OP_WHITESPACE:
1526                                 if (!isSpaceChar(c))
1527                                     RRETURN;
1528                                 break;
1529                                 
1530                             case OP_NOT_WORDCHAR:
1531                                 if (isWordChar(c))
1532                                     RRETURN;
1533                                 break;
1534                                 
1535                             case OP_WORDCHAR:
1536                                 if (!isWordChar(c))
1537                                     RRETURN;
1538                                 break;
1539                                 
1540                             default:
1541                                 ASSERT_NOT_REACHED();
1542                                 return matchError(JSRegExpErrorInternal, stack);
1543                         }
1544                     }
1545                     /* Control never reaches here */
1546                 }
1547                 
1548                 /* If maximizing it is worth using inline code for speed, doing the type
1549                  test once at the start (i.e. keep it out of the loop). */
1550                 
1551                 else {
1552                     stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;  /* Remember where we started */
1553                     
1554                     switch (stack.currentFrame->locals.ctype) {
1555                         case OP_NOT_NEWLINE:
1556                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
1557                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject || isNewline(*stack.currentFrame->args.subjectPtr))
1558                                     break;
1559                                 stack.currentFrame->args.subjectPtr++;
1560                             }
1561                             break;
1562                             
1563                         case OP_NOT_DIGIT:
1564                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
1565                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1566                                     break;
1567                                 int c = *stack.currentFrame->args.subjectPtr;
1568                                 if (isASCIIDigit(c))
1569                                     break;
1570                                 ++stack.currentFrame->args.subjectPtr;
1571                             }
1572                             break;
1573                             
1574                         case OP_DIGIT:
1575                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
1576                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1577                                     break;
1578                                 int c = *stack.currentFrame->args.subjectPtr;
1579                                 if (!isASCIIDigit(c))
1580                                     break;
1581                                 ++stack.currentFrame->args.subjectPtr;
1582                             }
1583                             break;
1584                             
1585                         case OP_NOT_WHITESPACE:
1586                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
1587                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1588                                     break;
1589                                 int c = *stack.currentFrame->args.subjectPtr;
1590                                 if (isSpaceChar(c))
1591                                     break;
1592                                 ++stack.currentFrame->args.subjectPtr;
1593                             }
1594                             break;
1595                             
1596                         case OP_WHITESPACE:
1597                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
1598                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1599                                     break;
1600                                 int c = *stack.currentFrame->args.subjectPtr;
1601                                 if (!isSpaceChar(c))
1602                                     break;
1603                                 ++stack.currentFrame->args.subjectPtr;
1604                             }
1605                             break;
1606                             
1607                         case OP_NOT_WORDCHAR:
1608                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
1609                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1610                                     break;
1611                                 int c = *stack.currentFrame->args.subjectPtr;
1612                                 if (isWordChar(c))
1613                                     break;
1614                                 ++stack.currentFrame->args.subjectPtr;
1615                             }
1616                             break;
1617                             
1618                         case OP_WORDCHAR:
1619                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
1620                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1621                                     break;
1622                                 int c = *stack.currentFrame->args.subjectPtr;
1623                                 if (!isWordChar(c))
1624                                     break;
1625                                 ++stack.currentFrame->args.subjectPtr;
1626                             }
1627                             break;
1628                             
1629                         default:
1630                             ASSERT_NOT_REACHED();
1631                             return matchError(JSRegExpErrorInternal, stack);
1632                     }
1633                     
1634                     /* stack.currentFrame->args.subjectPtr is now past the end of the maximum run */
1635                     
1636                     for (;;) {
1637                         RECURSIVE_MATCH(52, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1638                         if (isMatch)
1639                             RRETURN;
1640                         if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
1641                             break;        /* Stop if tried at original pos */
1642                     }
1643                     
1644                     /* Get here if we can't make it match with any permitted repetitions */
1645                     
1646                     RRETURN;
1647                 }
1648                 /* Control never reaches here */
1649                 
1650             BEGIN_OPCODE(CRMINPLUS):
1651             BEGIN_OPCODE(CRMINQUERY):
1652             BEGIN_OPCODE(CRMINRANGE):
1653             BEGIN_OPCODE(CRMINSTAR):
1654             BEGIN_OPCODE(CRPLUS):
1655             BEGIN_OPCODE(CRQUERY):
1656             BEGIN_OPCODE(CRRANGE):
1657             BEGIN_OPCODE(CRSTAR):
1658                 ASSERT_NOT_REACHED();
1659                 return matchError(JSRegExpErrorInternal, stack);
1660                 
1661 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
1662             CAPTURING_BRACKET:
1663 #else
1664             default:
1665 #endif
1666                 /* Opening capturing bracket. If there is space in the offset vector, save
1667                  the current subject position in the working slot at the top of the vector. We
1668                  mustn't change the current values of the data slot, because they may be set
1669                  from a previous iteration of this group, and be referred to by a reference
1670                  inside the group.
1671                  
1672                  If the bracket fails to match, we need to restore this value and also the
1673                  values of the final offsets, in case they were set by a previous iteration of
1674                  the same bracket.
1675                  
1676                  If there isn't enough space in the offset vector, treat this as if it were a
1677                  non-capturing bracket. Don't worry about setting the flag for the error case
1678                  here; that is handled in the code for KET. */
1679                 
1680                 ASSERT(*stack.currentFrame->args.instructionPtr > OP_BRA);
1681                 
1682                 stack.currentFrame->locals.number = *stack.currentFrame->args.instructionPtr - OP_BRA;
1683                 
1684                 /* For extended extraction brackets (large number), we have to fish out the
1685                  number from a dummy opcode at the start. */
1686                 
1687                 if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX)
1688                     stack.currentFrame->locals.number = get2ByteValue(stack.currentFrame->args.instructionPtr + 2 + LINK_SIZE);
1689                 stack.currentFrame->locals.offset = stack.currentFrame->locals.number << 1;
1690                 
1691 #ifdef DEBUG
1692                 printf("start bracket %d subject=", stack.currentFrame->locals.number);
1693                 pchars(stack.currentFrame->args.subjectPtr, 16, true, md);
1694                 printf("\n");
1695 #endif
1696                 
1697                 if (stack.currentFrame->locals.offset < md.offsetMax) {
1698                     stack.currentFrame->locals.saveOffset1 = md.offsetVector[stack.currentFrame->locals.offset];
1699                     stack.currentFrame->locals.saveOffset2 = md.offsetVector[stack.currentFrame->locals.offset + 1];
1700                     stack.currentFrame->locals.saveOffset3 = md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number];
1701                     
1702                     DPRINTF(("saving %d %d %d\n", stack.currentFrame->locals.saveOffset1, stack.currentFrame->locals.saveOffset2, stack.currentFrame->locals.saveOffset3));
1703                     md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number] = stack.currentFrame->args.subjectPtr - md.startSubject;
1704                     
1705                     do {
1706                         RECURSIVE_MATCH_STARTNG_NEW_GROUP(1, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.subpatternStart);
1707                         if (isMatch)
1708                             RRETURN;
1709                         stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
1710                     } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
1711                     
1712                     DPRINTF(("bracket %d failed\n", stack.currentFrame->locals.number));
1713                     
1714                     md.offsetVector[stack.currentFrame->locals.offset] = stack.currentFrame->locals.saveOffset1;
1715                     md.offsetVector[stack.currentFrame->locals.offset + 1] = stack.currentFrame->locals.saveOffset2;
1716                     md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number] = stack.currentFrame->locals.saveOffset3;
1717                     
1718                     RRETURN;
1719                 }
1720                 
1721                 /* Insufficient room for saving captured contents */
1722                 
1723                 goto NON_CAPTURING_BRACKET;
1724         }
1725         
1726         /* Do not stick any code in here without much thought; it is assumed
1727          that "continue" in the code above comes out to here to repeat the main
1728          loop. */
1729         
1730     } /* End of main loop */
1731     
1732     ASSERT_NOT_REACHED();
1733     
1734 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
1735     
1736 RRETURN_SWITCH:
1737     switch (stack.currentFrame->returnLocation) {
1738         case 0: goto RETURN;
1739         case 1: goto RRETURN_1;
1740         case 2: goto RRETURN_2;
1741         case 6: goto RRETURN_6;
1742         case 7: goto RRETURN_7;
1743         case 14: goto RRETURN_14;
1744         case 15: goto RRETURN_15;
1745         case 16: goto RRETURN_16;
1746         case 17: goto RRETURN_17;
1747         case 18: goto RRETURN_18;
1748         case 19: goto RRETURN_19;
1749         case 20: goto RRETURN_20;
1750         case 21: goto RRETURN_21;
1751         case 22: goto RRETURN_22;
1752         case 24: goto RRETURN_24;
1753         case 26: goto RRETURN_26;
1754         case 27: goto RRETURN_27;
1755         case 28: goto RRETURN_28;
1756         case 29: goto RRETURN_29;
1757         case 30: goto RRETURN_30;
1758         case 31: goto RRETURN_31;
1759         case 38: goto RRETURN_38;
1760         case 40: goto RRETURN_40;
1761         case 42: goto RRETURN_42;
1762         case 44: goto RRETURN_44;
1763         case 48: goto RRETURN_48;
1764         case 52: goto RRETURN_52;
1765     }
1766     
1767     ASSERT_NOT_REACHED();
1768     return matchError(JSRegExpErrorInternal, stack);
1769     
1770 #endif
1771     
1772 RETURN:
1773     return isMatch;
1774 }
1775
1776
1777 /*************************************************
1778 *         Execute a Regular Expression           *
1779 *************************************************/
1780
1781 /* This function applies a compiled re to a subject string and picks out
1782 portions of the string if it matches. Two elements in the vector are set for
1783 each substring: the offsets to the start and end of the substring.
1784
1785 Arguments:
1786   re              points to the compiled expression
1787   extra_data      points to extra data or is NULL
1788   subject         points to the subject string
1789   length          length of subject string (may contain binary zeros)
1790   start_offset    where to start in the subject string
1791   options         option bits
1792   offsets         points to a vector of ints to be filled in with offsets
1793   offsetcount     the number of elements in the vector
1794
1795 Returns:          > 0 => success; value is the number of elements filled in
1796                   = 0 => success, but offsets is not big enough
1797                    -1 => failed to match
1798                  < -1 => some kind of unexpected problem
1799 */
1800
1801 static void tryFirstByteOptimization(const UChar*& subjectPtr, const UChar* endSubject, int first_byte, bool first_byte_caseless, bool useMultiLineFirstCharOptimization, const UChar* originalSubjectStart)
1802 {
1803     // If first_byte is set, try scanning to the first instance of that byte
1804     // no need to try and match against any earlier part of the subject string.
1805     if (first_byte >= 0) {
1806         UChar first_char = first_byte;
1807         if (first_byte_caseless)
1808             while (subjectPtr < endSubject) {
1809                 int c = *subjectPtr;
1810                 if (c > 127)
1811                     break;
1812                 if (toLowerCase(c) == first_char)
1813                     break;
1814                 subjectPtr++;
1815             }
1816         else {
1817             while (subjectPtr < endSubject && *subjectPtr != first_char)
1818                 subjectPtr++;
1819         }
1820     } else if (useMultiLineFirstCharOptimization) {
1821         /* Or to just after \n for a multiline match if possible */
1822         // I'm not sure why this != originalSubjectStart check is necessary -- ecs 11/18/07
1823         if (subjectPtr > originalSubjectStart) {
1824             while (subjectPtr < endSubject && !isNewline(subjectPtr[-1]))
1825                 subjectPtr++;
1826         }
1827     }
1828 }
1829
1830 static bool tryRequiredByteOptimization(const UChar*& subjectPtr, const UChar* endSubject, int req_byte, int req_byte2, bool req_byte_caseless, bool hasFirstByte, const UChar*& reqBytePtr)
1831 {
1832     /* If req_byte is set, we know that that character must appear in the subject
1833      for the match to succeed. If the first character is set, req_byte must be
1834      later in the subject; otherwise the test starts at the match point. This
1835      optimization can save a huge amount of backtracking in patterns with nested
1836      unlimited repeats that aren't going to match. Writing separate code for
1837      cased/caseless versions makes it go faster, as does using an autoincrement
1838      and backing off on a match.
1839      
1840      HOWEVER: when the subject string is very, very long, searching to its end can
1841      take a long time, and give bad performance on quite ordinary patterns. This
1842      showed up when somebody was matching /^C/ on a 32-megabyte string... so we
1843      don't do this when the string is sufficiently long.
1844     */
1845
1846     if (req_byte >= 0 && endSubject - subjectPtr < REQ_BYTE_MAX) {
1847         const UChar* p = subjectPtr + (hasFirstByte ? 1 : 0);
1848
1849         /* We don't need to repeat the search if we haven't yet reached the
1850          place we found it at last time. */
1851
1852         if (p > reqBytePtr) {
1853             if (req_byte_caseless) {
1854                 while (p < endSubject) {
1855                     int pp = *p++;
1856                     if (pp == req_byte || pp == req_byte2) {
1857                         p--;
1858                         break;
1859                     }
1860                 }
1861             } else {
1862                 while (p < endSubject) {
1863                     if (*p++ == req_byte) {
1864                         p--;
1865                         break;
1866                     }
1867                 }
1868             }
1869
1870             /* If we can't find the required character, break the matching loop */
1871
1872             if (p >= endSubject)
1873                 return true;
1874
1875             /* If we have found the required character, save the point where we
1876              found it, so that we don't search again next time round the loop if
1877              the start hasn't passed this character yet. */
1878
1879             reqBytePtr = p;
1880         }
1881     }
1882     return false;
1883 }
1884
1885 int jsRegExpExecute(const JSRegExp* re,
1886                     const UChar* subject, int length, int start_offset, int* offsets,
1887                     int offsetcount)
1888 {
1889     ASSERT(re);
1890     ASSERT(subject);
1891     ASSERT(offsetcount >= 0);
1892     ASSERT(offsets || offsetcount == 0);
1893     
1894     MatchData matchBlock;
1895     matchBlock.startSubject = subject;
1896     matchBlock.endSubject = matchBlock.startSubject + length;
1897     const UChar* endSubject = matchBlock.endSubject;
1898     
1899     matchBlock.multiline = (re->options & MatchAcrossMultipleLinesOption);
1900     matchBlock.ignoreCase = (re->options & IgnoreCaseOption);
1901     
1902     /* If the expression has got more back references than the offsets supplied can
1903      hold, we get a temporary chunk of working store to use during the matching.
1904      Otherwise, we can use the vector supplied, rounding down its size to a multiple
1905      of 3. */
1906     
1907     int ocount = offsetcount - (offsetcount % 3);
1908     
1909     // FIXME: This is lame that we have to second-guess our caller here.
1910     // The API should change to either fail-hard when we don't have enough offset space
1911     // or that we shouldn't ask our callers to pre-allocate in the first place.
1912     bool using_temporary_offsets = false;
1913     if (re->top_backref > 0 && re->top_backref >= ocount/3) {
1914         ocount = re->top_backref * 3 + 3;
1915         matchBlock.offsetVector = new int[ocount];
1916         if (!matchBlock.offsetVector)
1917             return JSRegExpErrorNoMemory;
1918         using_temporary_offsets = true;
1919     } else
1920         matchBlock.offsetVector = offsets;
1921     
1922     matchBlock.offsetEnd = ocount;
1923     matchBlock.offsetMax = (2*ocount)/3;
1924     matchBlock.offsetOverflow = false;
1925     
1926     /* Compute the minimum number of offsets that we need to reset each time. Doing
1927      this makes a huge difference to execution time when there aren't many brackets
1928      in the pattern. */
1929     
1930     int resetcount = 2 + re->top_bracket * 2;
1931     if (resetcount > offsetcount)
1932         resetcount = ocount;
1933     
1934     /* Reset the working variable associated with each extraction. These should
1935      never be used unless previously set, but they get saved and restored, and so we
1936      initialize them to avoid reading uninitialized locations. */
1937     
1938     if (matchBlock.offsetVector) {
1939         int* iptr = matchBlock.offsetVector + ocount;
1940         int* iend = iptr - resetcount/2 + 1;
1941         while (--iptr >= iend)
1942             *iptr = -1;
1943     }
1944     
1945     /* Set up the first character to match, if available. The first_byte value is
1946      never set for an anchored regular expression, but the anchoring may be forced
1947      at run time, so we have to test for anchoring. The first char may be unset for
1948      an unanchored pattern, of course. If there's no first char and the pattern was
1949      studied, there may be a bitmap of possible first characters. */
1950     
1951     bool first_byte_caseless = false;
1952     int first_byte = -1;
1953     if (re->options & UseFirstByteOptimizationOption) {
1954         first_byte = re->first_byte & 255;
1955         if ((first_byte_caseless = (re->first_byte & REQ_IGNORE_CASE)))
1956             first_byte = toLowerCase(first_byte);
1957     }
1958     
1959     /* For anchored or unanchored matches, there may be a "last known required
1960      character" set. */
1961     
1962     bool req_byte_caseless = false;
1963     int req_byte = -1;
1964     int req_byte2 = -1;
1965     if (re->options & UseRequiredByteOptimizationOption) {
1966         req_byte = re->req_byte & 255; // FIXME: This optimization could be made to work for UTF16 chars as well...
1967         req_byte_caseless = (re->req_byte & REQ_IGNORE_CASE);
1968         req_byte2 = flipCase(req_byte);
1969     }
1970     
1971     /* Loop for handling unanchored repeated matching attempts; for anchored regexs
1972      the loop runs just once. */
1973     
1974     const UChar* startMatch = subject + start_offset;
1975     const UChar* reqBytePtr = startMatch - 1;
1976     bool useMultiLineFirstCharOptimization = re->options & UseMultiLineFirstByteOptimizationOption;
1977     
1978     do {
1979         /* Reset the maximum number of extractions we might see. */
1980         if (matchBlock.offsetVector) {
1981             int* iptr = matchBlock.offsetVector;
1982             int* iend = iptr + resetcount;
1983             while (iptr < iend)
1984                 *iptr++ = -1;
1985         }
1986         
1987         tryFirstByteOptimization(startMatch, endSubject, first_byte, first_byte_caseless, useMultiLineFirstCharOptimization, matchBlock.startSubject + start_offset);
1988         if (tryRequiredByteOptimization(startMatch, endSubject, req_byte, req_byte2, req_byte_caseless, first_byte >= 0, reqBytePtr))
1989             break;
1990                 
1991         /* When a match occurs, substrings will be set for all internal extractions;
1992          we just need to set up the whole thing as substring 0 before returning. If
1993          there were too many extractions, set the return code to zero. In the case
1994          where we had to get some local store to hold offsets for backreferences, copy
1995          those back references that we can. In this case there need not be overflow
1996          if certain parts of the pattern were not used. */
1997         
1998         /* The code starts after the JSRegExp block and the capture name table. */
1999         const unsigned char* start_code = (const unsigned char*)(re + 1);
2000         
2001         int returnCode = match(startMatch, start_code, 2, matchBlock);
2002         
2003         /* When the result is no match, advance the pointer to the next character
2004          and continue. */
2005         if (returnCode == false) {
2006             startMatch++;
2007             continue;
2008         }
2009
2010         if (returnCode != true) {
2011             ASSERT(returnCode == JSRegExpErrorHitLimit || returnCode == JSRegExpErrorNoMemory);
2012             DPRINTF((">>>> error: returning %d\n", rc));
2013             return returnCode;
2014         }
2015         
2016         /* We have a match! Copy the offset information from temporary store if
2017          necessary */
2018         
2019         if (using_temporary_offsets) {
2020             if (offsetcount >= 4) {
2021                 memcpy(offsets + 2, matchBlock.offsetVector + 2, (offsetcount - 2) * sizeof(int));
2022                 DPRINTF(("Copied offsets from temporary memory\n"));
2023             }
2024             if (matchBlock.endOffsetTop > offsetcount)
2025                 matchBlock.offsetOverflow = true;
2026             
2027             DPRINTF(("Freeing temporary memory\n"));
2028             delete [] matchBlock.offsetVector;
2029         }
2030         
2031         returnCode = matchBlock.offsetOverflow ? 0 : matchBlock.endOffsetTop / 2;
2032         
2033         if (offsetcount < 2)
2034             returnCode = 0;
2035         else {
2036             offsets[0] = startMatch - matchBlock.startSubject;
2037             offsets[1] = matchBlock.endMatchPtr - matchBlock.startSubject;
2038         }
2039         
2040         DPRINTF((">>>> returning %d\n", rc));
2041         return returnCode;
2042     } while (startMatch <= endSubject);
2043     
2044     if (using_temporary_offsets) {
2045         DPRINTF(("Freeing temporary memory\n"));
2046         delete [] matchBlock.offsetVector;
2047     }
2048     
2049     DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
2050     return JSRegExpErrorNoMatch;
2051 }