bee77b9c841ecd991ff8f475efcdfb38fc4b9f7f
[WebKit-https.git] / JavaScriptCore / pcre / pcre_exec.cpp
1 /* This is JavaScriptCore's variant of the PCRE library. While this library
2 started out as a copy of PCRE, many of the features of PCRE have been
3 removed. This library now supports only the regular expression features
4 required by the JavaScript language specification, and has only the functions
5 needed by JavaScriptCore and the rest of WebKit.
6
7                  Originally written by Philip Hazel
8            Copyright (c) 1997-2006 University of Cambridge
9     Copyright (C) 2002, 2004, 2006, 2007 Apple Inc. All rights reserved.
10     Copyright (C) 2007 Eric Seidel <eric@webkit.org>
11
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40
41 /* This module contains jsRegExpExecute(), the externally visible function
42 that does pattern matching using an NFA algorithm, following the rules from
43 the JavaScript specification. There are also some supporting functions. */
44
45 #include "config.h"
46
47 #include "pcre_internal.h"
48
49 #include <wtf/ASCIICType.h>
50 #include <wtf/Vector.h>
51
52 using namespace WTF;
53
54 #ifdef __GNUC__
55 #define USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
56 //#define USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
57 #endif
58
59 /* Avoid warnings on Windows. */
60 #undef min
61 #undef max
62
63 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
64 typedef int ReturnLocation;
65 #else
66 typedef void* ReturnLocation;
67 #endif
68
69 struct MatchFrame {
70     ReturnLocation returnLocation;
71     struct MatchFrame* previousFrame;
72     
73     /* Function arguments that may change */
74     struct {
75         const UChar* subjectPtr;
76         const unsigned char* instructionPtr;
77         int offsetTop;
78         const UChar* subpatternStart;
79     } args;
80     
81     
82     /* PCRE uses "fake" recursion built off of gotos, thus
83      stack-based local variables are not safe to use.  Instead we have to
84      store local variables on the current MatchFrame. */
85     struct {
86         const unsigned char* data;
87         const unsigned char* startOfRepeatingBracket;
88         const UChar* subjectPtrAtStartOfInstruction; // Several instrutions stash away a subjectPtr here for later compare
89         const unsigned char* instructionPtrAtStartOfOnce;
90         
91         int repeatOthercase;
92         
93         int ctype;
94         int fc;
95         int fi;
96         int length;
97         int max;
98         int number;
99         int offset;
100         int saveOffset1;
101         int saveOffset2;
102         int saveOffset3;
103         
104         const UChar* subpatternStart;
105     } locals;
106 };
107
108 /* Structure for passing "static" information around between the functions
109 doing traditional NFA matching, so that they are thread-safe. */
110
111 struct MatchData {
112   int*   offsetVector;         /* Offset vector */
113   int    offsetEnd;            /* One past the end */
114   int    offsetMax;            /* The maximum usable for return data */
115   bool   offsetOverflow;       /* Set if too many extractions */
116   const UChar*  startSubject;         /* Start of the subject string */
117   const UChar*  endSubject;           /* End of the subject string */
118   const UChar*  endMatchPtr;         /* Subject position at end match */
119   int    endOffsetTop;        /* Highwater mark at end of match */
120   bool   multiline;
121   bool   ignoreCase;
122 };
123
124 /* Non-error returns from the match() function. Error returns are externally
125 defined error codes, which are all negative. */
126
127 #define MATCH_MATCH        1
128 #define MATCH_NOMATCH      0
129
130 /* The maximum remaining length of subject we are prepared to search for a
131 req_byte match. */
132
133 #define REQ_BYTE_MAX 1000
134
135 /* The below limit restricts the number of recursive match calls in order to
136 limit the maximum amount of storage.
137  
138 This limit is tied to the size of MatchFrame.  Right now we allow PCRE to allocate up
139 to MATCH_RECURSION_LIMIT - 16 * sizeof(MatchFrame) bytes of "stack" space before we give up.
140 Currently that's 100000 - 16 * (23 * 4)  ~ 90MB. */
141
142 #define MATCH_RECURSION_LIMIT 100000
143
144 #ifdef DEBUG
145 /*************************************************
146 *        Debugging function to print chars       *
147 *************************************************/
148
149 /* Print a sequence of chars in printable format, stopping at the end of the
150 subject if the requested.
151
152 Arguments:
153   p           points to characters
154   length      number to print
155   isSubject  true if printing from within md.startSubject
156   md          pointer to matching data block, if isSubject is true
157 */
158
159 static void pchars(const UChar* p, int length, bool isSubject, const MatchData& md)
160 {
161     if (isSubject && length > md.endSubject - p)
162         length = md.endSubject - p;
163     while (length-- > 0) {
164         int c;
165         if (isprint(c = *(p++)))
166             printf("%c", c);
167         else if (c < 256)
168             printf("\\x%02x", c);
169         else
170             printf("\\x{%x}", c);
171     }
172 }
173 #endif
174
175 /*************************************************
176 *          Match a back-reference                *
177 *************************************************/
178
179 /* If a back reference hasn't been set, the length that is passed is greater
180 than the number of characters left in the string, so the match fails.
181
182 Arguments:
183   offset      index into the offset vector
184   subjectPtr        points into the subject
185   length      length to be matched
186   md          points to match data block
187
188 Returns:      true if matched
189 */
190
191 static bool matchRef(int offset, const UChar* subjectPtr, int length, const MatchData& md)
192 {
193     const UChar* p = md.startSubject + md.offsetVector[offset];
194     
195 #ifdef DEBUG
196     if (subjectPtr >= md.endSubject)
197         printf("matching subject <null>");
198     else {
199         printf("matching subject ");
200         pchars(subjectPtr, length, true, md);
201     }
202     printf(" against backref ");
203     pchars(p, length, false, md);
204     printf("\n");
205 #endif
206     
207     /* Always fail if not enough characters left */
208     
209     if (length > md.endSubject - subjectPtr)
210         return false;
211     
212     /* Separate the caselesss case for speed */
213     
214     if (md.ignoreCase) {
215         while (length-- > 0) {
216             UChar c = *p++;
217             int othercase = kjs_pcre_ucp_othercase(c);
218             UChar d = *subjectPtr++;
219             if (c != d && othercase != d)
220                 return false;
221         }
222     }
223     else {
224         while (length-- > 0)
225             if (*p++ != *subjectPtr++)
226                 return false;
227     }
228     
229     return true;
230 }
231
232 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
233
234 /* Use numbered labels and switch statement at the bottom of the match function. */
235
236 #define RMATCH_WHERE(num) num
237 #define RRETURN_LABEL RRETURN_SWITCH
238
239 #else
240
241 /* Use GCC's computed goto extension. */
242
243 /* For one test case this is more than 40% faster than the switch statement.
244 We could avoid the use of the num argument entirely by using local labels,
245 but using it for the GCC case as well as the non-GCC case allows us to share
246 a bit more code and notice if we use conflicting numbers.*/
247
248 #define RMATCH_WHERE(num) &&RRETURN_##num
249 #define RRETURN_LABEL *stack.currentFrame->returnLocation
250
251 #endif
252
253 #define RECURSIVE_MATCH_COMMON(num) \
254     if (stack.size >= MATCH_RECURSION_LIMIT) \
255         return matchError(JSRegExpErrorRecursionLimit, stack); \
256     goto RECURSE;\
257     RRETURN_##num: \
258     stack.popCurrentFrame();
259
260 #define RECURSIVE_MATCH(num, ra, rb) \
261     do { \
262         stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \
263         RECURSIVE_MATCH_COMMON(num) \
264     } while (0)
265
266 #define RECURSIVE_MATCH_STARTNG_NEW_GROUP(num, ra, rb) \
267     do { \
268         stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \
269         startNewGroup(stack.currentFrame); \
270         RECURSIVE_MATCH_COMMON(num) \
271     } while (0)
272
273 #define RRETURN goto RRETURN_LABEL
274
275 #define RRETURN_NO_MATCH do { isMatch = false; RRETURN; } while (0)
276
277 /*************************************************
278 *         Match from current position            *
279 *************************************************/
280
281 /* On entry instructionPtr points to the first opcode, and subjectPtr to the first character
282 in the subject string, while substringStart holds the value of subjectPtr at the start of the
283 last bracketed group - used for breaking infinite loops matching zero-length
284 strings. This function is called recursively in many circumstances. Whenever it
285 returns a negative (error) response, the outer match() call must also return the
286 same response.
287
288 Arguments:
289    subjectPtr        pointer in subject
290    instructionPtr       position in code
291    offsetTop  current top pointer
292    md          pointer to "static" info for the match
293
294 Returns:       MATCH_MATCH if matched            )  these values are >= 0
295                MATCH_NOMATCH if failed to match  )
296                a negative error value if aborted by an error condition
297                  (e.g. stopped by repeated call or recursion limit)
298 */
299
300 static const unsigned FRAMES_ON_STACK = 16;
301
302 struct MatchStack {
303     MatchStack()
304         : framesEnd(frames + FRAMES_ON_STACK)
305         , currentFrame(frames)
306         , size(1) // match() creates accesses the first frame w/o calling pushNewFrame
307     {
308         ASSERT((sizeof(frames) / sizeof(frames[0])) == FRAMES_ON_STACK);
309     }
310     
311     MatchFrame frames[FRAMES_ON_STACK];
312     MatchFrame* framesEnd;
313     MatchFrame* currentFrame;
314     unsigned size;
315     
316     inline bool canUseStackBufferForNextFrame()
317     {
318         return size < FRAMES_ON_STACK;
319     }
320     
321     inline MatchFrame* allocateNextFrame()
322     {
323         if (canUseStackBufferForNextFrame())
324             return currentFrame + 1;
325         return new MatchFrame;
326     }
327     
328     inline void pushNewFrame(const unsigned char* instructionPtr, const UChar* subpatternStart, ReturnLocation returnLocation)
329     {
330         MatchFrame* newframe = allocateNextFrame();
331         newframe->previousFrame = currentFrame;
332
333         newframe->args.subjectPtr = currentFrame->args.subjectPtr;
334         newframe->args.offsetTop = currentFrame->args.offsetTop;
335         newframe->args.instructionPtr = instructionPtr;
336         newframe->args.subpatternStart = subpatternStart;
337         newframe->returnLocation = returnLocation;
338         size++;
339
340         currentFrame = newframe;
341     }
342     
343     inline void popCurrentFrame()
344     {
345         MatchFrame* oldFrame = currentFrame;
346         currentFrame = currentFrame->previousFrame;
347         if (size > FRAMES_ON_STACK)
348             delete oldFrame;
349         size--;
350     }
351
352     void popAllFrames()
353     {
354         while (size)
355             popCurrentFrame();
356     }
357 };
358
359 static int matchError(int errorCode, MatchStack& stack)
360 {
361     stack.popAllFrames();
362     return errorCode;
363 }
364
365 /* Get the next UTF-8 character, not advancing the pointer, incrementing length
366  if there are extra bytes. This is called when we know we are in UTF-8 mode. */
367
368 static inline void getUTF8CharAndIncrementLength(int& c, const unsigned char* subjectPtr, int& len)
369 {
370     c = *subjectPtr;
371     if ((c & 0xc0) == 0xc0) {
372         int gcaa = kjs_pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */
373         int gcss = 6 * gcaa;
374         c = (c & kjs_pcre_utf8_table3[gcaa]) << gcss;
375         for (int gcii = 1; gcii <= gcaa; gcii++) {
376             gcss -= 6;
377             c |= (subjectPtr[gcii] & 0x3f) << gcss;
378         }
379         len += gcaa;
380     }
381 }
382
383 static inline void startNewGroup(MatchFrame* currentFrame)
384 {
385     /* At the start of a bracketed group, add the current subject pointer to the
386      stack of such pointers, to be re-instated at the end of the group when we hit
387      the closing ket. When match() is called in other circumstances, we don't add to
388      this stack. */
389     
390     currentFrame->locals.subpatternStart = currentFrame->args.subpatternStart;
391 }
392
393 // FIXME: "minimize" means "not greedy", we should invert the callers to ask for "greedy" to be less confusing
394 static inline void repeatInformationFromInstructionOffset(short instructionOffset, bool& minimize, int& minimumRepeats, int& maximumRepeats)
395 {
396     // Instruction offsets are based off of OP_CRSTAR, OP_STAR, OP_TYPESTAR, OP_NOTSTAR
397     static const char minimumRepeatsFromInstructionOffset[] = { 0, 0, 1, 1, 0, 0 };
398     static const int maximumRepeatsFromInstructionOffset[] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX, 1, 1 };
399
400     ASSERT(instructionOffset >= 0);
401     ASSERT(instructionOffset <= (OP_CRMINQUERY - OP_CRSTAR));
402
403     minimize = (instructionOffset & 1); // this assumes ordering: Instruction, MinimizeInstruction, Instruction2, MinimizeInstruction2
404     minimumRepeats = minimumRepeatsFromInstructionOffset[instructionOffset];
405     maximumRepeats = maximumRepeatsFromInstructionOffset[instructionOffset];
406 }
407
408 static int match(const UChar* subjectPtr, const unsigned char* instructionPtr, int offsetTop, MatchData& md)
409 {
410     int isMatch = false;
411     int min;
412     bool minimize = false; /* Initialization not really needed, but some compilers think so. */
413     
414     MatchStack stack;
415
416     /* The opcode jump table. */
417 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
418 #define EMIT_JUMP_TABLE_ENTRY(opcode) &&LABEL_OP_##opcode,
419     static void* opcodeJumpTable[256] = { FOR_EACH_OPCODE(EMIT_JUMP_TABLE_ENTRY) };
420 #undef EMIT_JUMP_TABLE_ENTRY
421 #endif
422     
423     /* One-time setup of the opcode jump table. */
424 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
425     for (int i = 255; !opcodeJumpTable[i]; i--)
426         opcodeJumpTable[i] = &&CAPTURING_BRACKET;
427 #endif
428     
429 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
430     // Shark shows this as a hot line
431     // Using a static const here makes this line disappear, but makes later access hotter (not sure why)
432     stack.currentFrame->returnLocation = &&RETURN;
433 #else
434     stack.currentFrame->returnLocation = 0;
435 #endif
436     stack.currentFrame->args.subjectPtr = subjectPtr;
437     stack.currentFrame->args.instructionPtr = instructionPtr;
438     stack.currentFrame->args.offsetTop = offsetTop;
439     stack.currentFrame->args.subpatternStart = 0;
440     startNewGroup(stack.currentFrame);
441     
442     /* This is where control jumps back to to effect "recursion" */
443     
444 RECURSE:
445
446     /* Now start processing the operations. */
447     
448 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
449     while (true)
450 #endif
451     {
452         
453 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
454 #define BEGIN_OPCODE(opcode) LABEL_OP_##opcode
455 #define NEXT_OPCODE goto *opcodeJumpTable[*stack.currentFrame->args.instructionPtr]
456 #else
457 #define BEGIN_OPCODE(opcode) case OP_##opcode
458 #define NEXT_OPCODE continue
459 #endif
460         
461 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
462         NEXT_OPCODE;
463 #else
464         switch (*stack.currentFrame->args.instructionPtr)
465 #endif
466         {
467             /* Non-capturing bracket: optimized */
468                 
469             BEGIN_OPCODE(BRA):
470             NON_CAPTURING_BRACKET:
471                 DPRINTF(("start bracket 0\n"));
472                 do {
473                     RECURSIVE_MATCH_STARTNG_NEW_GROUP(2, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.subpatternStart);
474                     if (isMatch)
475                         RRETURN;
476                     stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
477                 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
478                 DPRINTF(("bracket 0 failed\n"));
479                 RRETURN;
480                 
481             /* Skip over large extraction number data if encountered. */
482                 
483             BEGIN_OPCODE(BRANUMBER):
484                 stack.currentFrame->args.instructionPtr += 3;
485                 NEXT_OPCODE;
486                 
487             /* End of the pattern. */
488                 
489             BEGIN_OPCODE(END):
490                 md.endMatchPtr = stack.currentFrame->args.subjectPtr;          /* Record where we ended */
491                 md.endOffsetTop = stack.currentFrame->args.offsetTop;   /* and how many extracts were taken */
492                 isMatch = true;
493                 RRETURN;
494                 
495             /* Assertion brackets. Check the alternative branches in turn - the
496              matching won't pass the KET for an assertion. If any one branch matches,
497              the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
498              start of each branch to move the current point backwards, so the code at
499              this level is identical to the lookahead case. */
500                 
501             BEGIN_OPCODE(ASSERT):
502                 do {
503                     RECURSIVE_MATCH_STARTNG_NEW_GROUP(6, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, NULL);
504                     if (isMatch)
505                         break;
506                     stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
507                 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
508                 if (*stack.currentFrame->args.instructionPtr == OP_KET)
509                     RRETURN_NO_MATCH;
510                 
511                 /* Continue from after the assertion, updating the offsets high water
512                  mark, since extracts may have been taken during the assertion. */
513                 
514                 advanceToEndOfBracket(stack.currentFrame->args.instructionPtr);
515                 stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
516                 stack.currentFrame->args.offsetTop = md.endOffsetTop;
517                 NEXT_OPCODE;
518                 
519             /* Negative assertion: all branches must fail to match */
520                 
521             BEGIN_OPCODE(ASSERT_NOT):
522                 do {
523                     RECURSIVE_MATCH_STARTNG_NEW_GROUP(7, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, NULL);
524                     if (isMatch)
525                         RRETURN_NO_MATCH;
526                     stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
527                 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
528                 
529                 stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
530                 NEXT_OPCODE;
531                 
532             /* An alternation is the end of a branch; scan along to find the end of the
533              bracketed group and go to there. */
534                 
535             BEGIN_OPCODE(ALT):
536                 advanceToEndOfBracket(stack.currentFrame->args.instructionPtr);
537                 NEXT_OPCODE;
538                 
539             /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
540              that it may occur zero times. It may repeat infinitely, or not at all -
541              i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
542              repeat limits are compiled as a number of copies, with the optional ones
543              preceded by BRAZERO or BRAMINZERO. */
544                 
545             BEGIN_OPCODE(BRAZERO): {
546                 stack.currentFrame->locals.startOfRepeatingBracket = stack.currentFrame->args.instructionPtr + 1;
547                 RECURSIVE_MATCH_STARTNG_NEW_GROUP(14, stack.currentFrame->locals.startOfRepeatingBracket, stack.currentFrame->args.subpatternStart);
548                 if (isMatch)
549                     RRETURN;
550                 advanceToEndOfBracket(stack.currentFrame->locals.startOfRepeatingBracket);
551                 stack.currentFrame->args.instructionPtr = stack.currentFrame->locals.startOfRepeatingBracket + 1 + LINK_SIZE;
552                 NEXT_OPCODE;
553             }
554                 
555             BEGIN_OPCODE(BRAMINZERO): {
556                 stack.currentFrame->locals.startOfRepeatingBracket = stack.currentFrame->args.instructionPtr + 1;
557                 advanceToEndOfBracket(stack.currentFrame->locals.startOfRepeatingBracket);
558                 RECURSIVE_MATCH_STARTNG_NEW_GROUP(15, stack.currentFrame->locals.startOfRepeatingBracket + 1 + LINK_SIZE, stack.currentFrame->args.subpatternStart);
559                 if (isMatch)
560                     RRETURN;
561                 stack.currentFrame->args.instructionPtr++;
562                 NEXT_OPCODE;
563             }
564                 
565             /* End of a group, repeated or non-repeating. If we are at the end of
566              an assertion "group", stop matching and return MATCH_MATCH, but record the
567              current high water mark for use by positive assertions. Do this also
568              for the "once" (not-backup up) groups. */
569                 
570             BEGIN_OPCODE(KET):
571             BEGIN_OPCODE(KETRMIN):
572             BEGIN_OPCODE(KETRMAX):
573                 stack.currentFrame->locals.instructionPtrAtStartOfOnce = stack.currentFrame->args.instructionPtr - getLinkValue(stack.currentFrame->args.instructionPtr + 1);
574                 stack.currentFrame->args.subpatternStart = stack.currentFrame->locals.subpatternStart;
575                 stack.currentFrame->locals.subpatternStart = stack.currentFrame->previousFrame->args.subpatternStart;
576
577                 if (*stack.currentFrame->locals.instructionPtrAtStartOfOnce == OP_ASSERT || *stack.currentFrame->locals.instructionPtrAtStartOfOnce == OP_ASSERT_NOT) {
578                     md.endOffsetTop = stack.currentFrame->args.offsetTop;
579                     isMatch = true;
580                     RRETURN;
581                 }
582                 
583                 /* In all other cases except a conditional group we have to check the
584                  group number back at the start and if necessary complete handling an
585                  extraction by setting the offsets and bumping the high water mark. */
586                 
587                 stack.currentFrame->locals.number = *stack.currentFrame->locals.instructionPtrAtStartOfOnce - OP_BRA;
588                 
589                 /* For extended extraction brackets (large number), we have to fish out
590                  the number from a dummy opcode at the start. */
591                 
592                 if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX)
593                     stack.currentFrame->locals.number = get2ByteValue(stack.currentFrame->locals.instructionPtrAtStartOfOnce + 2 + LINK_SIZE);
594                 stack.currentFrame->locals.offset = stack.currentFrame->locals.number << 1;
595                 
596 #ifdef DEBUG
597                 printf("end bracket %d", stack.currentFrame->locals.number);
598                 printf("\n");
599 #endif
600                 
601                 /* Test for a numbered group. This includes groups called as a result
602                  of recursion. Note that whole-pattern recursion is coded as a recurse
603                  into group 0, so it won't be picked up here. Instead, we catch it when
604                  the OP_END is reached. */
605                 
606                 if (stack.currentFrame->locals.number > 0) {
607                     if (stack.currentFrame->locals.offset >= md.offsetMax)
608                         md.offsetOverflow = true;
609                     else {
610                         md.offsetVector[stack.currentFrame->locals.offset] =
611                         md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number];
612                         md.offsetVector[stack.currentFrame->locals.offset+1] = stack.currentFrame->args.subjectPtr - md.startSubject;
613                         if (stack.currentFrame->args.offsetTop <= stack.currentFrame->locals.offset)
614                             stack.currentFrame->args.offsetTop = stack.currentFrame->locals.offset + 2;
615                     }
616                 }
617                 
618                 /* For a non-repeating ket, just continue at this level. This also
619                  happens for a repeating ket if no characters were matched in the group.
620                  This is the forcible breaking of infinite loops as implemented in Perl
621                  5.005. If there is an options reset, it will get obeyed in the normal
622                  course of events. */
623                 
624                 if (*stack.currentFrame->args.instructionPtr == OP_KET || stack.currentFrame->args.subjectPtr == stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
625                     stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
626                     NEXT_OPCODE;
627                 }
628                 
629                 /* The repeating kets try the rest of the pattern or restart from the
630                  preceding bracket, in the appropriate order. */
631                 
632                 if (*stack.currentFrame->args.instructionPtr == OP_KETRMIN) {
633                     RECURSIVE_MATCH(16, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.subpatternStart);
634                     if (isMatch)
635                         RRETURN;
636                     RECURSIVE_MATCH_STARTNG_NEW_GROUP(17, stack.currentFrame->locals.instructionPtrAtStartOfOnce, stack.currentFrame->args.subpatternStart);
637                     if (isMatch)
638                         RRETURN;
639                 } else { /* OP_KETRMAX */
640                     RECURSIVE_MATCH_STARTNG_NEW_GROUP(18, stack.currentFrame->locals.instructionPtrAtStartOfOnce, stack.currentFrame->args.subpatternStart);
641                     if (isMatch)
642                         RRETURN;
643                     RECURSIVE_MATCH(19, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.subpatternStart);
644                     if (isMatch)
645                         RRETURN;
646                 }
647                 RRETURN;
648                 
649             /* Start of subject, or after internal newline if multiline. */
650                 
651             BEGIN_OPCODE(CIRC):
652                 if (stack.currentFrame->args.subjectPtr != md.startSubject && (!md.multiline || !isNewline(stack.currentFrame->args.subjectPtr[-1])))
653                     RRETURN_NO_MATCH;
654                 stack.currentFrame->args.instructionPtr++;
655                 NEXT_OPCODE;
656                 
657             /* End of subject, or before internal newline if multiline. */
658                 
659             BEGIN_OPCODE(DOLL):
660                 if (stack.currentFrame->args.subjectPtr < md.endSubject && (!md.multiline || !isNewline(*stack.currentFrame->args.subjectPtr)))
661                     RRETURN_NO_MATCH;
662                 stack.currentFrame->args.instructionPtr++;
663                 NEXT_OPCODE;
664                 
665             /* Word boundary assertions */
666                 
667             BEGIN_OPCODE(NOT_WORD_BOUNDARY):
668             BEGIN_OPCODE(WORD_BOUNDARY): {
669                 bool currentCharIsWordChar = false;
670                 bool previousCharIsWordChar = false;
671                 
672                 if (stack.currentFrame->args.subjectPtr > md.startSubject)
673                     previousCharIsWordChar = isWordChar(stack.currentFrame->args.subjectPtr[-1]);
674                 if (stack.currentFrame->args.subjectPtr < md.endSubject)
675                     currentCharIsWordChar = isWordChar(*stack.currentFrame->args.subjectPtr);
676                 
677                 /* Now see if the situation is what we want */
678                 bool wordBoundaryDesired = (*stack.currentFrame->args.instructionPtr++ == OP_WORD_BOUNDARY);
679                 if (wordBoundaryDesired ? currentCharIsWordChar == previousCharIsWordChar : currentCharIsWordChar != previousCharIsWordChar)
680                     RRETURN_NO_MATCH;
681                 NEXT_OPCODE;
682             }
683                 
684             /* Match a single character type; inline for speed */
685                 
686             BEGIN_OPCODE(NOT_NEWLINE):
687                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
688                     RRETURN_NO_MATCH;
689                 if (isNewline(*stack.currentFrame->args.subjectPtr++))
690                     RRETURN_NO_MATCH;
691                 stack.currentFrame->args.instructionPtr++;
692                 NEXT_OPCODE;
693
694             BEGIN_OPCODE(NOT_DIGIT):
695                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
696                     RRETURN_NO_MATCH;
697                 if (isASCIIDigit(*stack.currentFrame->args.subjectPtr++))
698                     RRETURN_NO_MATCH;
699                 stack.currentFrame->args.instructionPtr++;
700                 NEXT_OPCODE;
701
702             BEGIN_OPCODE(DIGIT):
703                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
704                     RRETURN_NO_MATCH;
705                 if (!isASCIIDigit(*stack.currentFrame->args.subjectPtr++))
706                     RRETURN_NO_MATCH;
707                 stack.currentFrame->args.instructionPtr++;
708                 NEXT_OPCODE;
709
710             BEGIN_OPCODE(NOT_WHITESPACE):
711                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
712                     RRETURN_NO_MATCH;
713                 if (isSpaceChar(*stack.currentFrame->args.subjectPtr++))
714                     RRETURN_NO_MATCH;
715                 stack.currentFrame->args.instructionPtr++;
716                 NEXT_OPCODE;
717
718             BEGIN_OPCODE(WHITESPACE):
719                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
720                     RRETURN_NO_MATCH;
721                 if (!isSpaceChar(*stack.currentFrame->args.subjectPtr++))
722                     RRETURN_NO_MATCH;
723                 stack.currentFrame->args.instructionPtr++;
724                 NEXT_OPCODE;
725                 
726             BEGIN_OPCODE(NOT_WORDCHAR):
727                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
728                     RRETURN_NO_MATCH;
729                 if (isWordChar(*stack.currentFrame->args.subjectPtr++))
730                     RRETURN_NO_MATCH;
731                 stack.currentFrame->args.instructionPtr++;
732                 NEXT_OPCODE;
733                 
734             BEGIN_OPCODE(WORDCHAR):
735                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
736                     RRETURN_NO_MATCH;
737                 if (!isWordChar(*stack.currentFrame->args.subjectPtr++))
738                     RRETURN_NO_MATCH;
739                 stack.currentFrame->args.instructionPtr++;
740                 NEXT_OPCODE;
741                 
742             /* Match a back reference, possibly repeatedly. Look past the end of the
743              item to see if there is repeat information following. The code is similar
744              to that for character classes, but repeated for efficiency. Then obey
745              similar code to character type repeats - written out again for speed.
746              However, if the referenced string is the empty string, always treat
747              it as matched, any number of times (otherwise there could be infinite
748              loops). */
749                 
750             BEGIN_OPCODE(REF):
751                 stack.currentFrame->locals.offset = get2ByteValue(stack.currentFrame->args.instructionPtr + 1) << 1;               /* Doubled ref number */
752                 stack.currentFrame->args.instructionPtr += 3;                                 /* Advance past item */
753                 
754                 /* If the reference is unset, set the length to be longer than the amount
755                  of subject left; this ensures that every attempt at a match fails. We
756                  can't just fail here, because of the possibility of quantifiers with zero
757                  minima. */
758                 
759                 if (stack.currentFrame->locals.offset >= stack.currentFrame->args.offsetTop || md.offsetVector[stack.currentFrame->locals.offset] < 0)
760                     stack.currentFrame->locals.length = 0;
761                 else
762                     stack.currentFrame->locals.length = md.offsetVector[stack.currentFrame->locals.offset+1] - md.offsetVector[stack.currentFrame->locals.offset];
763                 
764                 /* Set up for repetition, or handle the non-repeated case */
765                 
766                 switch (*stack.currentFrame->args.instructionPtr) {
767                     case OP_CRSTAR:
768                     case OP_CRMINSTAR:
769                     case OP_CRPLUS:
770                     case OP_CRMINPLUS:
771                     case OP_CRQUERY:
772                     case OP_CRMINQUERY:
773                         repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
774                         break;
775                         
776                     case OP_CRRANGE:
777                     case OP_CRMINRANGE:
778                         minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
779                         min = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
780                         stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 3);
781                         if (stack.currentFrame->locals.max == 0)
782                             stack.currentFrame->locals.max = INT_MAX;
783                         stack.currentFrame->args.instructionPtr += 5;
784                         break;
785                     
786                     default:               /* No repeat follows */
787                         if (!matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
788                             RRETURN_NO_MATCH;
789                         stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
790                         NEXT_OPCODE;
791                 }
792                 
793                 /* If the length of the reference is zero, just continue with the
794                  main loop. */
795                 
796                 if (stack.currentFrame->locals.length == 0)
797                     NEXT_OPCODE;
798                 
799                 /* First, ensure the minimum number of matches are present. */
800                 
801                 for (int i = 1; i <= min; i++) {
802                     if (!matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
803                         RRETURN_NO_MATCH;
804                     stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
805                 }
806                 
807                 /* If min = max, continue at the same level without recursion.
808                  They are not both allowed to be zero. */
809                 
810                 if (min == stack.currentFrame->locals.max)
811                     NEXT_OPCODE;
812                 
813                 /* If minimizing, keep trying and advancing the pointer */
814                 
815                 if (minimize) {
816                     for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
817                         RECURSIVE_MATCH(20, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
818                         if (isMatch)
819                             RRETURN;
820                         if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || !matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
821                             RRETURN;
822                         stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
823                     }
824                     /* Control never reaches here */
825                 }
826                 
827                 /* If maximizing, find the longest string and work backwards */
828                 
829                 else {
830                     stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
831                     for (int i = min; i < stack.currentFrame->locals.max; i++) {
832                         if (!matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
833                             break;
834                         stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
835                     }
836                     while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
837                         RECURSIVE_MATCH(21, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
838                         if (isMatch)
839                             RRETURN;
840                         stack.currentFrame->args.subjectPtr -= stack.currentFrame->locals.length;
841                     }
842                     RRETURN_NO_MATCH;
843                 }
844                 /* Control never reaches here */
845                 
846             /* Match a bit-mapped character class, possibly repeatedly. This op code is
847              used when all the characters in the class have values in the range 0-255,
848              and either the matching is caseful, or the characters are in the range
849              0-127 when UTF-8 processing is enabled. The only difference between
850              OP_CLASS and OP_NCLASS occurs when a data character outside the range is
851              encountered.
852              
853              First, look past the end of the item to see if there is repeat information
854              following. Then obey similar code to character type repeats - written out
855              again for speed. */
856                 
857             BEGIN_OPCODE(NCLASS):
858             BEGIN_OPCODE(CLASS):
859                 stack.currentFrame->locals.data = stack.currentFrame->args.instructionPtr + 1;                /* Save for matching */
860                 stack.currentFrame->args.instructionPtr += 33;                     /* Advance past the item */
861                 
862                 switch (*stack.currentFrame->args.instructionPtr) {
863                     case OP_CRSTAR:
864                     case OP_CRMINSTAR:
865                     case OP_CRPLUS:
866                     case OP_CRMINPLUS:
867                     case OP_CRQUERY:
868                     case OP_CRMINQUERY:
869                         repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
870                         break;
871                         
872                     case OP_CRRANGE:
873                     case OP_CRMINRANGE:
874                         minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
875                         min = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
876                         stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 3);
877                         if (stack.currentFrame->locals.max == 0)
878                             stack.currentFrame->locals.max = INT_MAX;
879                         stack.currentFrame->args.instructionPtr += 5;
880                         break;
881                         
882                     default:               /* No repeat follows */
883                         min = stack.currentFrame->locals.max = 1;
884                         break;
885                 }
886                 
887                 /* First, ensure the minimum number of matches are present. */
888                 
889                 for (int i = 1; i <= min; i++) {
890                     if (stack.currentFrame->args.subjectPtr >= md.endSubject)
891                         RRETURN_NO_MATCH;
892                     int c = *stack.currentFrame->args.subjectPtr++;
893                     if (c > 255) {
894                         if (stack.currentFrame->locals.data[-1] == OP_CLASS)
895                             RRETURN_NO_MATCH;
896                     } else {
897                         if (!(stack.currentFrame->locals.data[c / 8] & (1 << (c & 7))))
898                             RRETURN_NO_MATCH;
899                     }
900                 }
901                 
902                 /* If max == min we can continue with the main loop without the
903                  need to recurse. */
904                 
905                 if (min == stack.currentFrame->locals.max)
906                     NEXT_OPCODE;      
907                 
908                 /* If minimizing, keep testing the rest of the expression and advancing
909                  the pointer while it matches the class. */
910                 if (minimize) {
911                     for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
912                         RECURSIVE_MATCH(22, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
913                         if (isMatch)
914                             RRETURN;
915                         if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
916                             RRETURN;
917                         int c = *stack.currentFrame->args.subjectPtr++;
918                         if (c > 255) {
919                             if (stack.currentFrame->locals.data[-1] == OP_CLASS)
920                                 RRETURN;
921                         } else {
922                             if ((stack.currentFrame->locals.data[c/8] & (1 << (c&7))) == 0)
923                                 RRETURN;
924                         }
925                     }
926                     /* Control never reaches here */
927                 }
928                 /* If maximizing, find the longest possible run, then work backwards. */
929                 else {
930                     stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
931                     
932                     for (int i = min; i < stack.currentFrame->locals.max; i++) {
933                         if (stack.currentFrame->args.subjectPtr >= md.endSubject)
934                             break;
935                         int c = *stack.currentFrame->args.subjectPtr;
936                         if (c > 255) {
937                             if (stack.currentFrame->locals.data[-1] == OP_CLASS)
938                                 break;
939                         } else {
940                             if (!(stack.currentFrame->locals.data[c / 8] & (1 << (c & 7))))
941                                 break;
942                         }
943                         ++stack.currentFrame->args.subjectPtr;
944                     }
945                     for (;;) {
946                         RECURSIVE_MATCH(24, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
947                         if (isMatch)
948                             RRETURN;
949                         if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
950                             break;        /* Stop if tried at original pos */
951                     }
952                     
953                     RRETURN;
954                 }
955                 /* Control never reaches here */
956                 
957             /* Match an extended character class. */
958                 
959             BEGIN_OPCODE(XCLASS):
960                 stack.currentFrame->locals.data = stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE;                /* Save for matching */
961                 stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);                      /* Advance past the item */
962                 
963                 switch (*stack.currentFrame->args.instructionPtr) {
964                     case OP_CRSTAR:
965                     case OP_CRMINSTAR:
966                     case OP_CRPLUS:
967                     case OP_CRMINPLUS:
968                     case OP_CRQUERY:
969                     case OP_CRMINQUERY:
970                         repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
971                         break;
972                         
973                     case OP_CRRANGE:
974                     case OP_CRMINRANGE:
975                         minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
976                         min = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
977                         stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 3);
978                         if (stack.currentFrame->locals.max == 0)
979                             stack.currentFrame->locals.max = INT_MAX;
980                         stack.currentFrame->args.instructionPtr += 5;
981                         break;
982                         
983                     default:               /* No repeat follows */
984                         min = stack.currentFrame->locals.max = 1;
985                 }
986                 
987                 /* First, ensure the minimum number of matches are present. */
988                 
989                 for (int i = 1; i <= min; i++) {
990                     if (stack.currentFrame->args.subjectPtr >= md.endSubject)
991                         RRETURN_NO_MATCH;
992                     int c = *stack.currentFrame->args.subjectPtr++;
993                     if (!kjs_pcre_xclass(c, stack.currentFrame->locals.data))
994                         RRETURN_NO_MATCH;
995                 }
996                 
997                 /* If max == min we can continue with the main loop without the
998                  need to recurse. */
999                 
1000                 if (min == stack.currentFrame->locals.max)
1001                     NEXT_OPCODE;
1002                 
1003                 /* If minimizing, keep testing the rest of the expression and advancing
1004                  the pointer while it matches the class. */
1005                 
1006                 if (minimize) {
1007                     for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1008                         RECURSIVE_MATCH(26, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1009                         if (isMatch)
1010                             RRETURN;
1011                         if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
1012                             RRETURN;
1013                         int c = *stack.currentFrame->args.subjectPtr++;
1014                         if (!kjs_pcre_xclass(c, stack.currentFrame->locals.data))
1015                             RRETURN;
1016                     }
1017                     /* Control never reaches here */
1018                 }
1019                 
1020                 /* If maximizing, find the longest possible run, then work backwards. */
1021                 
1022                 else {
1023                     stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1024                     for (int i = min; i < stack.currentFrame->locals.max; i++) {
1025                         if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1026                             break;
1027                         int c = *stack.currentFrame->args.subjectPtr;
1028                         if (!kjs_pcre_xclass(c, stack.currentFrame->locals.data))
1029                             break;
1030                         ++stack.currentFrame->args.subjectPtr;
1031                     }
1032                     for(;;) {
1033                         RECURSIVE_MATCH(27, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1034                         if (isMatch)
1035                             RRETURN;
1036                         if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
1037                             break;        /* Stop if tried at original pos */
1038                     }
1039                     RRETURN;
1040                 }
1041                 
1042                 /* Control never reaches here */
1043                 
1044             /* Match a single character, casefully */
1045                 
1046             BEGIN_OPCODE(CHAR):
1047                 stack.currentFrame->locals.length = 1;
1048                 stack.currentFrame->args.instructionPtr++;
1049                 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
1050                 stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length;
1051                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1052                     RRETURN_NO_MATCH;
1053                 if (stack.currentFrame->locals.fc != *stack.currentFrame->args.subjectPtr++)
1054                     RRETURN_NO_MATCH;
1055                 NEXT_OPCODE;
1056                 
1057             /* Match a single character, caselessly */
1058                 
1059             BEGIN_OPCODE(CHAR_IGNORING_CASE): {
1060                 stack.currentFrame->locals.length = 1;
1061                 stack.currentFrame->args.instructionPtr++;
1062                 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
1063                 stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length;
1064                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1065                     RRETURN_NO_MATCH;
1066                 int dc = *stack.currentFrame->args.subjectPtr++;
1067                 if (stack.currentFrame->locals.fc != dc && kjs_pcre_ucp_othercase(stack.currentFrame->locals.fc) != dc)
1068                     RRETURN_NO_MATCH;
1069                 NEXT_OPCODE;
1070             }
1071                 
1072             /* Match a single ASCII character. */
1073                 
1074             BEGIN_OPCODE(ASCII_CHAR):
1075                 if (md.endSubject == stack.currentFrame->args.subjectPtr)
1076                     RRETURN_NO_MATCH;
1077                 if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->args.instructionPtr[1])
1078                     RRETURN_NO_MATCH;
1079                 ++stack.currentFrame->args.subjectPtr;
1080                 stack.currentFrame->args.instructionPtr += 2;
1081                 NEXT_OPCODE;
1082                 
1083             /* Match one of two cases of an ASCII letter. */
1084                 
1085             BEGIN_OPCODE(ASCII_LETTER_IGNORING_CASE):
1086                 if (md.endSubject == stack.currentFrame->args.subjectPtr)
1087                     RRETURN_NO_MATCH;
1088                 if ((*stack.currentFrame->args.subjectPtr | 0x20) != stack.currentFrame->args.instructionPtr[1])
1089                     RRETURN_NO_MATCH;
1090                 ++stack.currentFrame->args.subjectPtr;
1091                 stack.currentFrame->args.instructionPtr += 2;
1092                 NEXT_OPCODE;
1093                 
1094             /* Match a single character repeatedly; different opcodes share code. */
1095                 
1096             BEGIN_OPCODE(EXACT):
1097                 min = stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
1098                 minimize = false;
1099                 stack.currentFrame->args.instructionPtr += 3;
1100                 goto REPEATCHAR;
1101                 
1102             BEGIN_OPCODE(UPTO):
1103             BEGIN_OPCODE(MINUPTO):
1104                 min = 0;
1105                 stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
1106                 minimize = *stack.currentFrame->args.instructionPtr == OP_MINUPTO;
1107                 stack.currentFrame->args.instructionPtr += 3;
1108                 goto REPEATCHAR;
1109                 
1110             BEGIN_OPCODE(STAR):
1111             BEGIN_OPCODE(MINSTAR):
1112             BEGIN_OPCODE(PLUS):
1113             BEGIN_OPCODE(MINPLUS):
1114             BEGIN_OPCODE(QUERY):
1115             BEGIN_OPCODE(MINQUERY):
1116                 repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_STAR, minimize, min, stack.currentFrame->locals.max);
1117                 
1118                 /* Common code for all repeated single-character matches. We can give
1119                  up quickly if there are fewer than the minimum number of characters left in
1120                  the subject. */
1121                 
1122             REPEATCHAR:
1123                 
1124                 stack.currentFrame->locals.length = 1;
1125                 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
1126                 if (min * (stack.currentFrame->locals.fc > 0xFFFF ? 2 : 1) > md.endSubject - stack.currentFrame->args.subjectPtr)
1127                     RRETURN_NO_MATCH;
1128                 stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length;
1129                 
1130                 if (stack.currentFrame->locals.fc <= 0xFFFF) {
1131                     int othercase = md.ignoreCase ? kjs_pcre_ucp_othercase(stack.currentFrame->locals.fc) : -1;
1132                     
1133                     for (int i = 1; i <= min; i++) {
1134                         if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != othercase)
1135                             RRETURN_NO_MATCH;
1136                         ++stack.currentFrame->args.subjectPtr;
1137                     }
1138                     
1139                     if (min == stack.currentFrame->locals.max)
1140                         NEXT_OPCODE;
1141                     
1142                     if (minimize) {
1143                         stack.currentFrame->locals.repeatOthercase = othercase;
1144                         for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1145                             RECURSIVE_MATCH(28, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1146                             if (isMatch)
1147                                 RRETURN;
1148                             if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
1149                                 RRETURN;
1150                             if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.repeatOthercase)
1151                                 RRETURN;
1152                             ++stack.currentFrame->args.subjectPtr;
1153                         }
1154                         /* Control never reaches here */
1155                     } else {
1156                         stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1157                         for (int i = min; i < stack.currentFrame->locals.max; i++) {
1158                             if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1159                                 break;
1160                             if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != othercase)
1161                                 break;
1162                             ++stack.currentFrame->args.subjectPtr;
1163                         }
1164                         while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
1165                             RECURSIVE_MATCH(29, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1166                             if (isMatch)
1167                                 RRETURN;
1168                             --stack.currentFrame->args.subjectPtr;
1169                         }
1170                         RRETURN_NO_MATCH;
1171                     }
1172                     /* Control never reaches here */
1173                 } else {
1174                     /* No case on surrogate pairs, so no need to bother with "othercase". */
1175                     
1176                     for (int i = 1; i <= min; i++) {
1177                         if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
1178                             RRETURN_NO_MATCH;
1179                         stack.currentFrame->args.subjectPtr += 2;
1180                     }
1181                     
1182                     if (min == stack.currentFrame->locals.max)
1183                         NEXT_OPCODE;
1184                     
1185                     if (minimize) {
1186                         for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1187                             RECURSIVE_MATCH(30, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1188                             if (isMatch)
1189                                 RRETURN;
1190                             if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
1191                                 RRETURN;
1192                             if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
1193                                 RRETURN;
1194                             stack.currentFrame->args.subjectPtr += 2;
1195                         }
1196                         /* Control never reaches here */
1197                     } else {
1198                         stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1199                         for (int i = min; i < stack.currentFrame->locals.max; i++) {
1200                             if (stack.currentFrame->args.subjectPtr > md.endSubject - 2)
1201                                 break;
1202                             if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
1203                                 break;
1204                             stack.currentFrame->args.subjectPtr += 2;
1205                         }
1206                         while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
1207                             RECURSIVE_MATCH(31, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1208                             if (isMatch)
1209                                 RRETURN;
1210                             stack.currentFrame->args.subjectPtr -= 2;
1211                         }
1212                         RRETURN_NO_MATCH;
1213                     }
1214                     /* Control never reaches here */
1215                 }
1216                 /* Control never reaches here */
1217                 
1218             /* Match a negated single one-byte character. */
1219                 
1220             BEGIN_OPCODE(NOT): {
1221                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1222                     RRETURN_NO_MATCH;
1223                 stack.currentFrame->args.instructionPtr++;
1224                 int c = *stack.currentFrame->args.subjectPtr++;
1225                 if (md.ignoreCase) {
1226                     if (c < 128)
1227                         c = toLowerCase(c);
1228                     if (toLowerCase(*stack.currentFrame->args.instructionPtr++) == c)
1229                         RRETURN_NO_MATCH;
1230                 } else {
1231                     if (*stack.currentFrame->args.instructionPtr++ == c)
1232                         RRETURN_NO_MATCH;
1233                 }
1234                 NEXT_OPCODE;
1235             }
1236                 
1237             /* Match a negated single one-byte character repeatedly. This is almost a
1238              repeat of the code for a repeated single character, but I haven't found a
1239              nice way of commoning these up that doesn't require a test of the
1240              positive/negative option for each character match. Maybe that wouldn't add
1241              very much to the time taken, but character matching *is* what this is all
1242              about... */
1243                 
1244             BEGIN_OPCODE(NOTEXACT):
1245                 min = stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
1246                 minimize = false;
1247                 stack.currentFrame->args.instructionPtr += 3;
1248                 goto REPEATNOTCHAR;
1249                 
1250             BEGIN_OPCODE(NOTUPTO):
1251             BEGIN_OPCODE(NOTMINUPTO):
1252                 min = 0;
1253                 stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
1254                 minimize = *stack.currentFrame->args.instructionPtr == OP_NOTMINUPTO;
1255                 stack.currentFrame->args.instructionPtr += 3;
1256                 goto REPEATNOTCHAR;
1257                 
1258             BEGIN_OPCODE(NOTSTAR):
1259             BEGIN_OPCODE(NOTMINSTAR):
1260             BEGIN_OPCODE(NOTPLUS):
1261             BEGIN_OPCODE(NOTMINPLUS):
1262             BEGIN_OPCODE(NOTQUERY):
1263             BEGIN_OPCODE(NOTMINQUERY):
1264                 repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_NOTSTAR, minimize, min, stack.currentFrame->locals.max);
1265                 
1266             /* Common code for all repeated single-byte matches. We can give up quickly
1267              if there are fewer than the minimum number of bytes left in the
1268              subject. */
1269                 
1270             REPEATNOTCHAR:
1271                 if (min > md.endSubject - stack.currentFrame->args.subjectPtr)
1272                     RRETURN_NO_MATCH;
1273                 stack.currentFrame->locals.fc = *stack.currentFrame->args.instructionPtr++;
1274                 
1275                 /* The code is duplicated for the caseless and caseful cases, for speed,
1276                  since matching characters is likely to be quite common. First, ensure the
1277                  minimum number of matches are present. If min = max, continue at the same
1278                  level without recursing. Otherwise, if minimizing, keep trying the rest of
1279                  the expression and advancing one matching character if failing, up to the
1280                  maximum. Alternatively, if maximizing, find the maximum number of
1281                  characters and work backwards. */
1282                 
1283                 DPRINTF(("negative matching %c{%d,%d}\n", stack.currentFrame->locals.fc, min, stack.currentFrame->locals.max));
1284                 
1285                 if (md.ignoreCase) {
1286                     if (stack.currentFrame->locals.fc < 128)
1287                         stack.currentFrame->locals.fc = toLowerCase(stack.currentFrame->locals.fc);
1288                     
1289                     for (int i = 1; i <= min; i++) {
1290                         int d = *stack.currentFrame->args.subjectPtr++;
1291                         if (d < 128)
1292                             d = toLowerCase(d);
1293                         if (stack.currentFrame->locals.fc == d)
1294                             RRETURN_NO_MATCH;
1295                     }
1296                     
1297                     if (min == stack.currentFrame->locals.max)
1298                         NEXT_OPCODE;      
1299                     
1300                     if (minimize) {
1301                         for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1302                             RECURSIVE_MATCH(38, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1303                             if (isMatch)
1304                                 RRETURN;
1305                             int d = *stack.currentFrame->args.subjectPtr++;
1306                             if (d < 128)
1307                                 d = toLowerCase(d);
1308                             if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject || stack.currentFrame->locals.fc == d)
1309                                 RRETURN;
1310                         }
1311                         /* Control never reaches here */
1312                     }
1313                     
1314                     /* Maximize case */
1315                     
1316                     else {
1317                         stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1318                         
1319                         for (int i = min; i < stack.currentFrame->locals.max; i++) {
1320                             if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1321                                 break;
1322                             int d = *stack.currentFrame->args.subjectPtr;
1323                             if (d < 128)
1324                                 d = toLowerCase(d);
1325                             if (stack.currentFrame->locals.fc == d)
1326                                 break;
1327                             ++stack.currentFrame->args.subjectPtr;
1328                         }
1329                         for (;;) {
1330                             RECURSIVE_MATCH(40, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1331                             if (isMatch)
1332                                 RRETURN;
1333                             if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
1334                                 break;        /* Stop if tried at original pos */
1335                         }
1336                         
1337                         RRETURN;
1338                     }
1339                     /* Control never reaches here */
1340                 }
1341                 
1342                 /* Caseful comparisons */
1343                 
1344                 else {
1345                     for (int i = 1; i <= min; i++) {
1346                         int d = *stack.currentFrame->args.subjectPtr++;
1347                         if (stack.currentFrame->locals.fc == d)
1348                             RRETURN_NO_MATCH;
1349                     }
1350
1351                     if (min == stack.currentFrame->locals.max)
1352                         NEXT_OPCODE;
1353                     
1354                     if (minimize) {
1355                         for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1356                             RECURSIVE_MATCH(42, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1357                             if (isMatch)
1358                                 RRETURN;
1359                             int d = *stack.currentFrame->args.subjectPtr++;
1360                             if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject || stack.currentFrame->locals.fc == d)
1361                                 RRETURN;
1362                         }
1363                         /* Control never reaches here */
1364                     }
1365                     
1366                     /* Maximize case */
1367                     
1368                     else {
1369                         stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1370                         
1371                         for (int i = min; i < stack.currentFrame->locals.max; i++) {
1372                             if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1373                                 break;
1374                             int d = *stack.currentFrame->args.subjectPtr;
1375                             if (stack.currentFrame->locals.fc == d)
1376                                 break;
1377                             ++stack.currentFrame->args.subjectPtr;
1378                         }
1379                         for (;;) {
1380                             RECURSIVE_MATCH(44, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1381                             if (isMatch)
1382                                 RRETURN;
1383                             if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
1384                                 break;        /* Stop if tried at original pos */
1385                         }
1386
1387                         RRETURN;
1388                     }
1389                 }
1390                 /* Control never reaches here */
1391                 
1392             /* Match a single character type repeatedly; several different opcodes
1393              share code. This is very similar to the code for single characters, but we
1394              repeat it in the interests of efficiency. */
1395                 
1396             BEGIN_OPCODE(TYPEEXACT):
1397                 min = stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
1398                 minimize = true;
1399                 stack.currentFrame->args.instructionPtr += 3;
1400                 goto REPEATTYPE;
1401                 
1402             BEGIN_OPCODE(TYPEUPTO):
1403             BEGIN_OPCODE(TYPEMINUPTO):
1404                 min = 0;
1405                 stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
1406                 minimize = *stack.currentFrame->args.instructionPtr == OP_TYPEMINUPTO;
1407                 stack.currentFrame->args.instructionPtr += 3;
1408                 goto REPEATTYPE;
1409                 
1410             BEGIN_OPCODE(TYPESTAR):
1411             BEGIN_OPCODE(TYPEMINSTAR):
1412             BEGIN_OPCODE(TYPEPLUS):
1413             BEGIN_OPCODE(TYPEMINPLUS):
1414             BEGIN_OPCODE(TYPEQUERY):
1415             BEGIN_OPCODE(TYPEMINQUERY):
1416                 repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_TYPESTAR, minimize, min, stack.currentFrame->locals.max);
1417                 
1418                 /* Common code for all repeated single character type matches. Note that
1419                  in UTF-8 mode, '.' matches a character of any length, but for the other
1420                  character types, the valid characters are all one-byte long. */
1421                 
1422             REPEATTYPE:
1423                 stack.currentFrame->locals.ctype = *stack.currentFrame->args.instructionPtr++;      /* Code for the character type */
1424                 
1425                 /* First, ensure the minimum number of matches are present. Use inline
1426                  code for maximizing the speed, and do the type test once at the start
1427                  (i.e. keep it out of the loop). Also we can test that there are at least
1428                  the minimum number of characters before we start. */
1429                 
1430                 if (min > md.endSubject - stack.currentFrame->args.subjectPtr)
1431                     RRETURN_NO_MATCH;
1432                 if (min > 0) {
1433                     switch (stack.currentFrame->locals.ctype) {
1434                         case OP_NOT_NEWLINE:
1435                             for (int i = 1; i <= min; i++) {
1436                                 if (isNewline(*stack.currentFrame->args.subjectPtr))
1437                                     RRETURN_NO_MATCH;
1438                                 ++stack.currentFrame->args.subjectPtr;
1439                             }
1440                             break;
1441                             
1442                         case OP_NOT_DIGIT:
1443                             for (int i = 1; i <= min; i++) {
1444                                 if (isASCIIDigit(*stack.currentFrame->args.subjectPtr))
1445                                     RRETURN_NO_MATCH;
1446                                 ++stack.currentFrame->args.subjectPtr;
1447                             }
1448                             break;
1449                             
1450                         case OP_DIGIT:
1451                             for (int i = 1; i <= min; i++) {
1452                                 if (!isASCIIDigit(*stack.currentFrame->args.subjectPtr))
1453                                     RRETURN_NO_MATCH;
1454                                 ++stack.currentFrame->args.subjectPtr;
1455                             }
1456                             break;
1457                             
1458                         case OP_NOT_WHITESPACE:
1459                             for (int i = 1; i <= min; i++) {
1460                                 if (isSpaceChar(*stack.currentFrame->args.subjectPtr))
1461                                     RRETURN_NO_MATCH;
1462                                 ++stack.currentFrame->args.subjectPtr;
1463                             }
1464                             break;
1465                             
1466                         case OP_WHITESPACE:
1467                             for (int i = 1; i <= min; i++) {
1468                                 if (!isSpaceChar(*stack.currentFrame->args.subjectPtr))
1469                                     RRETURN_NO_MATCH;
1470                                 ++stack.currentFrame->args.subjectPtr;
1471                             }
1472                             break;
1473                             
1474                         case OP_NOT_WORDCHAR:
1475                             for (int i = 1; i <= min; i++) {
1476                                 if (isWordChar(*stack.currentFrame->args.subjectPtr))
1477                                     RRETURN_NO_MATCH;
1478                                 ++stack.currentFrame->args.subjectPtr;
1479                             }
1480                             break;
1481                             
1482                         case OP_WORDCHAR:
1483                             for (int i = 1; i <= min; i++) {
1484                                 if (!isWordChar(*stack.currentFrame->args.subjectPtr))
1485                                     RRETURN_NO_MATCH;
1486                                 ++stack.currentFrame->args.subjectPtr;
1487                             }
1488                             break;
1489                             
1490                         default:
1491                             ASSERT_NOT_REACHED();
1492                             return matchError(JSRegExpErrorInternal, stack);
1493                     }  /* End switch(stack.currentFrame->locals.ctype) */
1494                 }
1495                 
1496                 /* If min = max, continue at the same level without recursing */
1497                 
1498                 if (min == stack.currentFrame->locals.max)
1499                     NEXT_OPCODE;    
1500                 
1501                 /* If minimizing, we have to test the rest of the pattern before each
1502                  subsequent match. */
1503                 
1504                 if (minimize) {
1505                     for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1506                         RECURSIVE_MATCH(48, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1507                         if (isMatch)
1508                             RRETURN;
1509                         if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
1510                             RRETURN;
1511                         
1512                         int c = *stack.currentFrame->args.subjectPtr++;
1513                         switch (stack.currentFrame->locals.ctype) {
1514                             case OP_NOT_NEWLINE:
1515                                 if (isNewline(c))
1516                                     RRETURN;
1517                                 break;
1518                                 
1519                             case OP_NOT_DIGIT:
1520                                 if (isASCIIDigit(c))
1521                                     RRETURN;
1522                                 break;
1523                                 
1524                             case OP_DIGIT:
1525                                 if (!isASCIIDigit(c))
1526                                     RRETURN;
1527                                 break;
1528                                 
1529                             case OP_NOT_WHITESPACE:
1530                                 if (isSpaceChar(c))
1531                                     RRETURN;
1532                                 break;
1533                                 
1534                             case OP_WHITESPACE:
1535                                 if (!isSpaceChar(c))
1536                                     RRETURN;
1537                                 break;
1538                                 
1539                             case OP_NOT_WORDCHAR:
1540                                 if (isWordChar(c))
1541                                     RRETURN;
1542                                 break;
1543                                 
1544                             case OP_WORDCHAR:
1545                                 if (!isWordChar(c))
1546                                     RRETURN;
1547                                 break;
1548                                 
1549                             default:
1550                                 ASSERT_NOT_REACHED();
1551                                 return matchError(JSRegExpErrorInternal, stack);
1552                         }
1553                     }
1554                     /* Control never reaches here */
1555                 }
1556                 
1557                 /* If maximizing it is worth using inline code for speed, doing the type
1558                  test once at the start (i.e. keep it out of the loop). */
1559                 
1560                 else {
1561                     stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;  /* Remember where we started */
1562                     
1563                     switch (stack.currentFrame->locals.ctype) {
1564                         case OP_NOT_NEWLINE:
1565                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
1566                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject || isNewline(*stack.currentFrame->args.subjectPtr))
1567                                     break;
1568                                 stack.currentFrame->args.subjectPtr++;
1569                             }
1570                             break;
1571                             
1572                         case OP_NOT_DIGIT:
1573                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
1574                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1575                                     break;
1576                                 int c = *stack.currentFrame->args.subjectPtr;
1577                                 if (isASCIIDigit(c))
1578                                     break;
1579                                 ++stack.currentFrame->args.subjectPtr;
1580                             }
1581                             break;
1582                             
1583                         case OP_DIGIT:
1584                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
1585                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1586                                     break;
1587                                 int c = *stack.currentFrame->args.subjectPtr;
1588                                 if (!isASCIIDigit(c))
1589                                     break;
1590                                 ++stack.currentFrame->args.subjectPtr;
1591                             }
1592                             break;
1593                             
1594                         case OP_NOT_WHITESPACE:
1595                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
1596                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1597                                     break;
1598                                 int c = *stack.currentFrame->args.subjectPtr;
1599                                 if (isSpaceChar(c))
1600                                     break;
1601                                 ++stack.currentFrame->args.subjectPtr;
1602                             }
1603                             break;
1604                             
1605                         case OP_WHITESPACE:
1606                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
1607                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1608                                     break;
1609                                 int c = *stack.currentFrame->args.subjectPtr;
1610                                 if (!isSpaceChar(c))
1611                                     break;
1612                                 ++stack.currentFrame->args.subjectPtr;
1613                             }
1614                             break;
1615                             
1616                         case OP_NOT_WORDCHAR:
1617                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
1618                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1619                                     break;
1620                                 int c = *stack.currentFrame->args.subjectPtr;
1621                                 if (isWordChar(c))
1622                                     break;
1623                                 ++stack.currentFrame->args.subjectPtr;
1624                             }
1625                             break;
1626                             
1627                         case OP_WORDCHAR:
1628                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
1629                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1630                                     break;
1631                                 int c = *stack.currentFrame->args.subjectPtr;
1632                                 if (!isWordChar(c))
1633                                     break;
1634                                 ++stack.currentFrame->args.subjectPtr;
1635                             }
1636                             break;
1637                             
1638                         default:
1639                             ASSERT_NOT_REACHED();
1640                             return matchError(JSRegExpErrorInternal, stack);
1641                     }
1642                     
1643                     /* stack.currentFrame->args.subjectPtr is now past the end of the maximum run */
1644                     
1645                     for (;;) {
1646                         RECURSIVE_MATCH(52, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1647                         if (isMatch)
1648                             RRETURN;
1649                         if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
1650                             break;        /* Stop if tried at original pos */
1651                     }
1652                     
1653                     /* Get here if we can't make it match with any permitted repetitions */
1654                     
1655                     RRETURN;
1656                 }
1657                 /* Control never reaches here */
1658                 
1659             BEGIN_OPCODE(CRMINPLUS):
1660             BEGIN_OPCODE(CRMINQUERY):
1661             BEGIN_OPCODE(CRMINRANGE):
1662             BEGIN_OPCODE(CRMINSTAR):
1663             BEGIN_OPCODE(CRPLUS):
1664             BEGIN_OPCODE(CRQUERY):
1665             BEGIN_OPCODE(CRRANGE):
1666             BEGIN_OPCODE(CRSTAR):
1667                 ASSERT_NOT_REACHED();
1668                 return matchError(JSRegExpErrorInternal, stack);
1669                 
1670 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
1671             CAPTURING_BRACKET:
1672 #else
1673             default:
1674 #endif
1675                 /* Opening capturing bracket. If there is space in the offset vector, save
1676                  the current subject position in the working slot at the top of the vector. We
1677                  mustn't change the current values of the data slot, because they may be set
1678                  from a previous iteration of this group, and be referred to by a reference
1679                  inside the group.
1680                  
1681                  If the bracket fails to match, we need to restore this value and also the
1682                  values of the final offsets, in case they were set by a previous iteration of
1683                  the same bracket.
1684                  
1685                  If there isn't enough space in the offset vector, treat this as if it were a
1686                  non-capturing bracket. Don't worry about setting the flag for the error case
1687                  here; that is handled in the code for KET. */
1688                 
1689                 ASSERT(*stack.currentFrame->args.instructionPtr > OP_BRA);
1690                 
1691                 stack.currentFrame->locals.number = *stack.currentFrame->args.instructionPtr - OP_BRA;
1692                 
1693                 /* For extended extraction brackets (large number), we have to fish out the
1694                  number from a dummy opcode at the start. */
1695                 
1696                 if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX)
1697                     stack.currentFrame->locals.number = get2ByteValue(stack.currentFrame->args.instructionPtr + 2 + LINK_SIZE);
1698                 stack.currentFrame->locals.offset = stack.currentFrame->locals.number << 1;
1699                 
1700 #ifdef DEBUG
1701                 printf("start bracket %d subject=", stack.currentFrame->locals.number);
1702                 pchars(stack.currentFrame->args.subjectPtr, 16, true, md);
1703                 printf("\n");
1704 #endif
1705                 
1706                 if (stack.currentFrame->locals.offset < md.offsetMax) {
1707                     stack.currentFrame->locals.saveOffset1 = md.offsetVector[stack.currentFrame->locals.offset];
1708                     stack.currentFrame->locals.saveOffset2 = md.offsetVector[stack.currentFrame->locals.offset + 1];
1709                     stack.currentFrame->locals.saveOffset3 = md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number];
1710                     
1711                     DPRINTF(("saving %d %d %d\n", stack.currentFrame->locals.saveOffset1, stack.currentFrame->locals.saveOffset2, stack.currentFrame->locals.saveOffset3));
1712                     md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number] = stack.currentFrame->args.subjectPtr - md.startSubject;
1713                     
1714                     do {
1715                         RECURSIVE_MATCH_STARTNG_NEW_GROUP(1, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.subpatternStart);
1716                         if (isMatch)
1717                             RRETURN;
1718                         stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
1719                     } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
1720                     
1721                     DPRINTF(("bracket %d failed\n", stack.currentFrame->locals.number));
1722                     
1723                     md.offsetVector[stack.currentFrame->locals.offset] = stack.currentFrame->locals.saveOffset1;
1724                     md.offsetVector[stack.currentFrame->locals.offset + 1] = stack.currentFrame->locals.saveOffset2;
1725                     md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number] = stack.currentFrame->locals.saveOffset3;
1726                     
1727                     RRETURN;
1728                 }
1729                 
1730                 /* Insufficient room for saving captured contents */
1731                 
1732                 goto NON_CAPTURING_BRACKET;
1733         }
1734         
1735         /* Do not stick any code in here without much thought; it is assumed
1736          that "continue" in the code above comes out to here to repeat the main
1737          loop. */
1738         
1739     } /* End of main loop */
1740     
1741     ASSERT_NOT_REACHED();
1742     
1743 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
1744     
1745 RRETURN_SWITCH:
1746     switch (stack.currentFrame->returnLocation) {
1747         case 0: goto RETURN;
1748         case 1: goto RRETURN_1;
1749         case 2: goto RRETURN_2;
1750         case 6: goto RRETURN_6;
1751         case 7: goto RRETURN_7;
1752         case 14: goto RRETURN_14;
1753         case 15: goto RRETURN_15;
1754         case 16: goto RRETURN_16;
1755         case 17: goto RRETURN_17;
1756         case 18: goto RRETURN_18;
1757         case 19: goto RRETURN_19;
1758         case 20: goto RRETURN_20;
1759         case 21: goto RRETURN_21;
1760         case 22: goto RRETURN_22;
1761         case 24: goto RRETURN_24;
1762         case 26: goto RRETURN_26;
1763         case 27: goto RRETURN_27;
1764         case 28: goto RRETURN_28;
1765         case 29: goto RRETURN_29;
1766         case 30: goto RRETURN_30;
1767         case 31: goto RRETURN_31;
1768         case 38: goto RRETURN_38;
1769         case 40: goto RRETURN_40;
1770         case 42: goto RRETURN_42;
1771         case 44: goto RRETURN_44;
1772         case 48: goto RRETURN_48;
1773         case 52: goto RRETURN_52;
1774     }
1775     
1776     ASSERT_NOT_REACHED();
1777     return matchError(JSRegExpErrorInternal, stack);
1778     
1779 #endif
1780     
1781 RETURN:
1782     ASSERT(isMatch == MATCH_MATCH || isMatch == MATCH_NOMATCH);
1783     return isMatch;
1784 }
1785
1786
1787 /*************************************************
1788 *         Execute a Regular Expression           *
1789 *************************************************/
1790
1791 /* This function applies a compiled re to a subject string and picks out
1792 portions of the string if it matches. Two elements in the vector are set for
1793 each substring: the offsets to the start and end of the substring.
1794
1795 Arguments:
1796   re              points to the compiled expression
1797   extra_data      points to extra data or is NULL
1798   subject         points to the subject string
1799   length          length of subject string (may contain binary zeros)
1800   start_offset    where to start in the subject string
1801   options         option bits
1802   offsets         points to a vector of ints to be filled in with offsets
1803   offsetcount     the number of elements in the vector
1804
1805 Returns:          > 0 => success; value is the number of elements filled in
1806                   = 0 => success, but offsets is not big enough
1807                    -1 => failed to match
1808                  < -1 => some kind of unexpected problem
1809 */
1810
1811 static void tryFirstByteOptimization(const UChar*& subjectPtr, const UChar* endSubject, int first_byte, bool first_byte_caseless, bool useMultiLineFirstCharOptimization, const UChar* originalSubjectStart)
1812 {
1813     // If first_byte is set, try scanning to the first instance of that byte
1814     // no need to try and match against any earlier part of the subject string.
1815     if (first_byte >= 0) {
1816         UChar first_char = first_byte;
1817         if (first_byte_caseless)
1818             while (subjectPtr < endSubject) {
1819                 int c = *subjectPtr;
1820                 if (c > 127)
1821                     break;
1822                 if (toLowerCase(c) == first_char)
1823                     break;
1824                 subjectPtr++;
1825             }
1826         else {
1827             while (subjectPtr < endSubject && *subjectPtr != first_char)
1828                 subjectPtr++;
1829         }
1830     } else if (useMultiLineFirstCharOptimization) {
1831         /* Or to just after \n for a multiline match if possible */
1832         // I'm not sure why this != originalSubjectStart check is necessary -- ecs 11/18/07
1833         if (subjectPtr > originalSubjectStart) {
1834             while (subjectPtr < endSubject && !isNewline(subjectPtr[-1]))
1835                 subjectPtr++;
1836         }
1837     }
1838 }
1839
1840 static bool tryRequiredByteOptimization(const UChar*& subjectPtr, const UChar* endSubject, int req_byte, int req_byte2, bool req_byte_caseless, bool hasFirstByte, const UChar*& reqBytePtr)
1841 {
1842     /* If req_byte is set, we know that that character must appear in the subject
1843      for the match to succeed. If the first character is set, req_byte must be
1844      later in the subject; otherwise the test starts at the match point. This
1845      optimization can save a huge amount of backtracking in patterns with nested
1846      unlimited repeats that aren't going to match. Writing separate code for
1847      cased/caseless versions makes it go faster, as does using an autoincrement
1848      and backing off on a match.
1849      
1850      HOWEVER: when the subject string is very, very long, searching to its end can
1851      take a long time, and give bad performance on quite ordinary patterns. This
1852      showed up when somebody was matching /^C/ on a 32-megabyte string... so we
1853      don't do this when the string is sufficiently long.
1854     */
1855
1856     if (req_byte >= 0 && endSubject - subjectPtr < REQ_BYTE_MAX) {
1857         const UChar* p = subjectPtr + (hasFirstByte ? 1 : 0);
1858
1859         /* We don't need to repeat the search if we haven't yet reached the
1860          place we found it at last time. */
1861
1862         if (p > reqBytePtr) {
1863             if (req_byte_caseless) {
1864                 while (p < endSubject) {
1865                     int pp = *p++;
1866                     if (pp == req_byte || pp == req_byte2) {
1867                         p--;
1868                         break;
1869                     }
1870                 }
1871             } else {
1872                 while (p < endSubject) {
1873                     if (*p++ == req_byte) {
1874                         p--;
1875                         break;
1876                     }
1877                 }
1878             }
1879
1880             /* If we can't find the required character, break the matching loop */
1881
1882             if (p >= endSubject)
1883                 return true;
1884
1885             /* If we have found the required character, save the point where we
1886              found it, so that we don't search again next time round the loop if
1887              the start hasn't passed this character yet. */
1888
1889             reqBytePtr = p;
1890         }
1891     }
1892     return false;
1893 }
1894
1895 int jsRegExpExecute(const JSRegExp* re,
1896                     const UChar* subject, int length, int start_offset, int* offsets,
1897                     int offsetcount)
1898 {
1899     ASSERT(re);
1900     ASSERT(subject);
1901     ASSERT(offsetcount >= 0);
1902     ASSERT(offsets || offsetcount == 0);
1903     
1904     MatchData matchBlock;
1905     matchBlock.startSubject = subject;
1906     matchBlock.endSubject = matchBlock.startSubject + length;
1907     const UChar* endSubject = matchBlock.endSubject;
1908     
1909     matchBlock.multiline = (re->options & MatchAcrossMultipleLinesOption);
1910     matchBlock.ignoreCase = (re->options & IgnoreCaseOption);
1911     
1912     /* If the expression has got more back references than the offsets supplied can
1913      hold, we get a temporary chunk of working store to use during the matching.
1914      Otherwise, we can use the vector supplied, rounding down its size to a multiple
1915      of 3. */
1916     
1917     int ocount = offsetcount - (offsetcount % 3);
1918     
1919     // FIXME: This is lame that we have to second-guess our caller here.
1920     // The API should change to either fail-hard when we don't have enough offset space
1921     // or that we shouldn't ask our callers to pre-allocate in the first place.
1922     bool using_temporary_offsets = false;
1923     if (re->top_backref > 0 && re->top_backref >= ocount/3) {
1924         ocount = re->top_backref * 3 + 3;
1925         matchBlock.offsetVector = new int[ocount];
1926         if (!matchBlock.offsetVector)
1927             return JSRegExpErrorNoMemory;
1928         using_temporary_offsets = true;
1929     } else
1930         matchBlock.offsetVector = offsets;
1931     
1932     matchBlock.offsetEnd = ocount;
1933     matchBlock.offsetMax = (2*ocount)/3;
1934     matchBlock.offsetOverflow = false;
1935     
1936     /* Compute the minimum number of offsets that we need to reset each time. Doing
1937      this makes a huge difference to execution time when there aren't many brackets
1938      in the pattern. */
1939     
1940     int resetcount = 2 + re->top_bracket * 2;
1941     if (resetcount > offsetcount)
1942         resetcount = ocount;
1943     
1944     /* Reset the working variable associated with each extraction. These should
1945      never be used unless previously set, but they get saved and restored, and so we
1946      initialize them to avoid reading uninitialized locations. */
1947     
1948     if (matchBlock.offsetVector) {
1949         int* iptr = matchBlock.offsetVector + ocount;
1950         int* iend = iptr - resetcount/2 + 1;
1951         while (--iptr >= iend)
1952             *iptr = -1;
1953     }
1954     
1955     /* Set up the first character to match, if available. The first_byte value is
1956      never set for an anchored regular expression, but the anchoring may be forced
1957      at run time, so we have to test for anchoring. The first char may be unset for
1958      an unanchored pattern, of course. If there's no first char and the pattern was
1959      studied, there may be a bitmap of possible first characters. */
1960     
1961     bool first_byte_caseless = false;
1962     int first_byte = -1;
1963     if (re->options & UseFirstByteOptimizationOption) {
1964         first_byte = re->first_byte & 255;
1965         if ((first_byte_caseless = (re->first_byte & REQ_IGNORE_CASE)))
1966             first_byte = toLowerCase(first_byte);
1967     }
1968     
1969     /* For anchored or unanchored matches, there may be a "last known required
1970      character" set. */
1971     
1972     bool req_byte_caseless = false;
1973     int req_byte = -1;
1974     int req_byte2 = -1;
1975     if (re->options & UseRequiredByteOptimizationOption) {
1976         req_byte = re->req_byte & 255; // FIXME: This optimization could be made to work for UTF16 chars as well...
1977         req_byte_caseless = (re->req_byte & REQ_IGNORE_CASE);
1978         req_byte2 = flipCase(req_byte);
1979     }
1980     
1981     /* Loop for handling unanchored repeated matching attempts; for anchored regexs
1982      the loop runs just once. */
1983     
1984     const UChar* startMatch = subject + start_offset;
1985     const UChar* reqBytePtr = startMatch - 1;
1986     bool useMultiLineFirstCharOptimization = re->options & UseMultiLineFirstByteOptimizationOption;
1987     
1988     do {
1989         /* Reset the maximum number of extractions we might see. */
1990         if (matchBlock.offsetVector) {
1991             int* iptr = matchBlock.offsetVector;
1992             int* iend = iptr + resetcount;
1993             while (iptr < iend)
1994                 *iptr++ = -1;
1995         }
1996         
1997         tryFirstByteOptimization(startMatch, endSubject, first_byte, first_byte_caseless, useMultiLineFirstCharOptimization, matchBlock.startSubject + start_offset);
1998         if (tryRequiredByteOptimization(startMatch, endSubject, req_byte, req_byte2, req_byte_caseless, first_byte >= 0, reqBytePtr))
1999             break;
2000                 
2001         /* When a match occurs, substrings will be set for all internal extractions;
2002          we just need to set up the whole thing as substring 0 before returning. If
2003          there were too many extractions, set the return code to zero. In the case
2004          where we had to get some local store to hold offsets for backreferences, copy
2005          those back references that we can. In this case there need not be overflow
2006          if certain parts of the pattern were not used. */
2007         
2008         /* The code starts after the JSRegExp block and the capture name table. */
2009         const unsigned char* start_code = (const unsigned char*)(re + 1);
2010         
2011         int returnCode = match(startMatch, start_code, 2, matchBlock);
2012         
2013         /* When the result is no match, advance the pointer to the next character
2014          and continue. */
2015         
2016         if (returnCode == MATCH_NOMATCH) {
2017             startMatch++;
2018             continue;
2019         }
2020         
2021         if (returnCode != MATCH_MATCH) {
2022             DPRINTF((">>>> error: returning %d\n", rc));
2023             return returnCode;
2024         }
2025         
2026         /* We have a match! Copy the offset information from temporary store if
2027          necessary */
2028         
2029         if (using_temporary_offsets) {
2030             if (offsetcount >= 4) {
2031                 memcpy(offsets + 2, matchBlock.offsetVector + 2, (offsetcount - 2) * sizeof(int));
2032                 DPRINTF(("Copied offsets from temporary memory\n"));
2033             }
2034             if (matchBlock.endOffsetTop > offsetcount)
2035                 matchBlock.offsetOverflow = true;
2036             
2037             DPRINTF(("Freeing temporary memory\n"));
2038             delete [] matchBlock.offsetVector;
2039         }
2040         
2041         returnCode = matchBlock.offsetOverflow ? 0 : matchBlock.endOffsetTop / 2;
2042         
2043         if (offsetcount < 2)
2044             returnCode = 0;
2045         else {
2046             offsets[0] = startMatch - matchBlock.startSubject;
2047             offsets[1] = matchBlock.endMatchPtr - matchBlock.startSubject;
2048         }
2049         
2050         DPRINTF((">>>> returning %d\n", rc));
2051         return returnCode;
2052     } while (startMatch <= endSubject);
2053     
2054     if (using_temporary_offsets) {
2055         DPRINTF(("Freeing temporary memory\n"));
2056         delete [] matchBlock.offsetVector;
2057     }
2058     
2059     DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
2060     return JSRegExpErrorNoMatch;
2061 }