2007-11-26 Eric Seidel <eric@webkit.org>
[WebKit-https.git] / JavaScriptCore / pcre / pcre_exec.cpp
1 /* This is JavaScriptCore's variant of the PCRE library. While this library
2 started out as a copy of PCRE, many of the features of PCRE have been
3 removed. This library now supports only the regular expression features
4 required by the JavaScript language specification, and has only the functions
5 needed by JavaScriptCore and the rest of WebKit.
6
7                  Originally written by Philip Hazel
8            Copyright (c) 1997-2006 University of Cambridge
9     Copyright (C) 2002, 2004, 2006, 2007 Apple Inc. All rights reserved.
10     Copyright (C) 2007 Eric Seidel <eric@webkit.org>
11
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40
41 /* This module contains jsRegExpExecute(), the externally visible function
42 that does pattern matching using an NFA algorithm, following the rules from
43 the JavaScript specification. There are also some supporting functions. */
44
45 #include "config.h"
46
47 #include "pcre_internal.h"
48
49 #include <wtf/ASCIICType.h>
50 #include <wtf/Vector.h>
51
52 using namespace WTF;
53
54 #ifdef __GNUC__
55 #define USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
56 //#define USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
57 #endif
58
59 /* Avoid warnings on Windows. */
60 #undef min
61 #undef max
62
63 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
64 typedef int ReturnLocation;
65 #else
66 typedef void* ReturnLocation;
67 #endif
68
69 struct MatchFrame {
70     ReturnLocation returnLocation;
71     struct MatchFrame* previousFrame;
72     
73     /* Function arguments that may change */
74     struct {
75         const UChar* subjectPtr;
76         const uschar* instructionPtr;
77         int offset_top;
78         const UChar* subpatternStart;
79     } args;
80     
81     
82     /* PCRE uses "fake" recursion built off of gotos, thus
83      stack-based local variables are not safe to use.  Instead we have to
84      store local variables on the current MatchFrame. */
85     struct {
86         const uschar* data;
87         const uschar* startOfRepeatingBracket;
88         const UChar* subjectPtrAtStartOfInstruction; // Several instrutions stash away a subjectPtr here for later compare
89         const uschar* instructionPtrAtStartOfOnce;
90         
91         int repeat_othercase;
92         
93         int ctype;
94         int fc;
95         int fi;
96         int length;
97         int max;
98         int number;
99         int offset;
100         int save_offset1;
101         int save_offset2;
102         int save_offset3;
103         
104         const UChar* subpatternStart;
105     } locals;
106 };
107
108 /* Structure for passing "static" information around between the functions
109 doing traditional NFA matching, so that they are thread-safe. */
110
111 struct MatchData {
112   int*   offset_vector;         /* Offset vector */
113   int    offset_end;            /* One past the end */
114   int    offset_max;            /* The maximum usable for return data */
115   bool   offset_overflow;       /* Set if too many extractions */
116   UChar*  start_subject;         /* Start of the subject string */
117   UChar*  end_subject;           /* End of the subject string */
118   const UChar*  end_match_ptr;         /* Subject position at end match */
119   int    end_offset_top;        /* Highwater mark at end of match */
120   bool   multiline;
121   bool   ignoreCase;
122 };
123
124 /* Non-error returns from the match() function. Error returns are externally
125 defined PCRE_ERROR_xxx codes, which are all negative. */
126
127 #define MATCH_MATCH        1
128 #define MATCH_NOMATCH      0
129
130 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
131
132 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
133 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
134
135
136
137 #ifdef DEBUG
138 /*************************************************
139 *        Debugging function to print chars       *
140 *************************************************/
141
142 /* Print a sequence of chars in printable format, stopping at the end of the
143 subject if the requested.
144
145 Arguments:
146   p           points to characters
147   length      number to print
148   is_subject  true if printing from within md.start_subject
149   md          pointer to matching data block, if is_subject is true
150 */
151
152 static void pchars(const UChar* p, int length, bool is_subject, const MatchData& md)
153 {
154     if (is_subject && length > md.end_subject - p)
155         length = md.end_subject - p;
156     while (length-- > 0) {
157         int c;
158         if (isprint(c = *(p++)))
159             printf("%c", c);
160         else if (c < 256)
161             printf("\\x%02x", c);
162         else
163             printf("\\x{%x}", c);
164     }
165 }
166 #endif
167
168
169
170 /*************************************************
171 *          Match a back-reference                *
172 *************************************************/
173
174 /* If a back reference hasn't been set, the length that is passed is greater
175 than the number of characters left in the string, so the match fails.
176
177 Arguments:
178   offset      index into the offset vector
179   subjectPtr        points into the subject
180   length      length to be matched
181   md          points to match data block
182
183 Returns:      true if matched
184 */
185
186 static bool match_ref(int offset, const UChar* subjectPtr, int length, const MatchData& md)
187 {
188     UChar* p = md.start_subject + md.offset_vector[offset];
189     
190 #ifdef DEBUG
191     if (subjectPtr >= md.end_subject)
192         printf("matching subject <null>");
193     else {
194         printf("matching subject ");
195         pchars(subjectPtr, length, true, md);
196     }
197     printf(" against backref ");
198     pchars(p, length, false, md);
199     printf("\n");
200 #endif
201     
202     /* Always fail if not enough characters left */
203     
204     if (length > md.end_subject - subjectPtr)
205         return false;
206     
207     /* Separate the caselesss case for speed */
208     
209     if (md.ignoreCase) {
210         while (length-- > 0) {
211             UChar c = *p++;
212             int othercase = _pcre_ucp_othercase(c);
213             UChar d = *subjectPtr++;
214             if (c != d && othercase != d)
215                 return false;
216         }
217     }
218     else {
219         while (length-- > 0)
220             if (*p++ != *subjectPtr++)
221                 return false;
222     }
223     
224     return true;
225 }
226
227 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
228
229 /* Use numbered labels and switch statement at the bottom of the match function. */
230
231 #define RMATCH_WHERE(num) num
232 #define RRETURN_LABEL RRETURN_SWITCH
233
234 #else
235
236 /* Use GCC's computed goto extension. */
237
238 /* For one test case this is more than 40% faster than the switch statement.
239 We could avoid the use of the num argument entirely by using local labels,
240 but using it for the GCC case as well as the non-GCC case allows us to share
241 a bit more code and notice if we use conflicting numbers.*/
242
243 #define RMATCH_WHERE(num) &&RRETURN_##num
244 #define RRETURN_LABEL *stack.currentFrame->returnLocation
245
246 #endif
247
248 #define CHECK_RECURSION_LIMIT \
249     if (stack.size >= MATCH_LIMIT_RECURSION) \
250         return matchError(JSRegExpErrorRecursionLimit, stack);
251
252 #define RECURSE_WITH_RETURN_NUMBER(num) \
253     CHECK_RECURSION_LIMIT \
254     goto RECURSE;\
255     RRETURN_##num:
256
257 #define RECURSIVE_MATCH(num, ra, rb) \
258 {\
259     stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \
260     RECURSE_WITH_RETURN_NUMBER(num) \
261     stack.popCurrentFrame(); \
262 }
263
264 #define RECURSIVE_MATCH_STARTNG_NEW_GROUP(num, ra, rb) \
265 {\
266     stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \
267     startNewGroup(stack.currentFrame); \
268     RECURSE_WITH_RETURN_NUMBER(num) \
269     stack.popCurrentFrame(); \
270 }
271
272 #define RRETURN goto RRETURN_LABEL
273
274 #define RRETURN_NO_MATCH \
275   {\
276     is_match = false;\
277     RRETURN;\
278   }
279
280 /*************************************************
281 *         Match from current position            *
282 *************************************************/
283
284 /* On entry instructionPtr points to the first opcode, and subjectPtr to the first character
285 in the subject string, while subjectPtrb holds the value of subjectPtr at the start of the
286 last bracketed group - used for breaking infinite loops matching zero-length
287 strings. This function is called recursively in many circumstances. Whenever it
288 returns a negative (error) response, the outer incarnation must also return the
289 same response.
290
291 Arguments:
292    subjectPtr        pointer in subject
293    instructionPtr       position in code
294    offset_top  current top pointer
295    md          pointer to "static" info for the match
296
297 Returns:       MATCH_MATCH if matched            )  these values are >= 0
298                MATCH_NOMATCH if failed to match  )
299                a negative PCRE_ERROR_xxx value if aborted by an error condition
300                  (e.g. stopped by repeated call or recursion limit)
301 */
302
303 static const unsigned FRAMES_ON_STACK = 16;
304
305 struct MatchStack {
306     MatchStack()
307         : framesEnd(frames + FRAMES_ON_STACK)
308         , currentFrame(frames)
309         , size(1) // match() creates accesses the first frame w/o calling pushNewFrame
310     {
311         ASSERT((sizeof(frames) / sizeof(frames[0])) == FRAMES_ON_STACK);
312     }
313     
314     /* The value 16 here is large enough that most regular expressions don't require
315      any calls to pcre_stack_malloc, yet the amount of stack used for the array is
316      modest enough that we don't run out of stack. */
317     MatchFrame frames[FRAMES_ON_STACK];
318     MatchFrame* framesEnd;
319     MatchFrame* currentFrame;
320     unsigned size;
321     
322     inline bool canUseStackBufferForNextFrame()
323     {
324         return size < FRAMES_ON_STACK;
325     }
326     
327     inline MatchFrame* allocateNextFrame()
328     {
329         if (canUseStackBufferForNextFrame())
330             return currentFrame + 1;
331         return new MatchFrame;
332     }
333     
334     inline void pushNewFrame(const uschar* instructionPtr, const UChar* subpatternStart, ReturnLocation returnLocation)
335     {
336         MatchFrame* newframe = allocateNextFrame();
337         newframe->previousFrame = currentFrame;
338
339         newframe->args.subjectPtr = currentFrame->args.subjectPtr;
340         newframe->args.offset_top = currentFrame->args.offset_top;
341         newframe->args.instructionPtr = instructionPtr;
342         newframe->args.subpatternStart = subpatternStart;
343         newframe->returnLocation = returnLocation;
344         size++;
345
346         currentFrame = newframe;
347     }
348     
349     inline void popCurrentFrame()
350     {
351         MatchFrame* oldFrame = currentFrame;
352         currentFrame = currentFrame->previousFrame;
353         if (size > FRAMES_ON_STACK)
354             delete oldFrame;
355         size--;
356     }
357
358     void popAllFrames()
359     {
360         while (size)
361             popCurrentFrame();
362     }
363 };
364
365 static int matchError(int errorCode, MatchStack& stack)
366 {
367     stack.popAllFrames();
368     return errorCode;
369 }
370
371 /* Get the next UTF-8 character, not advancing the pointer, incrementing length
372  if there are extra bytes. This is called when we know we are in UTF-8 mode. */
373
374 static inline void getUTF8CharAndIncrementLength(int& c, const uschar* subjectPtr, int& len)
375 {
376     c = *subjectPtr;
377     if ((c & 0xc0) == 0xc0) {
378         int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */
379         int gcss = 6 * gcaa;
380         c = (c & _pcre_utf8_table3[gcaa]) << gcss;
381         for (int gcii = 1; gcii <= gcaa; gcii++) {
382             gcss -= 6;
383             c |= (subjectPtr[gcii] & 0x3f) << gcss;
384         }
385         len += gcaa;
386     }
387 }
388
389 static inline void startNewGroup(MatchFrame* currentFrame)
390 {
391     /* At the start of a bracketed group, add the current subject pointer to the
392      stack of such pointers, to be re-instated at the end of the group when we hit
393      the closing ket. When match() is called in other circumstances, we don't add to
394      this stack. */
395     
396     currentFrame->locals.subpatternStart = currentFrame->args.subpatternStart;
397 }
398
399 static int match(UChar* subjectPtr, const uschar* instructionPtr, int offset_top, MatchData& md)
400 {
401     int is_match = false;
402     int i;
403     int c;
404     
405     bool cur_is_word;
406     bool prev_is_word;
407     int min;
408     bool minimize = false; /* Initialization not really needed, but some compilers think so. */
409     
410     MatchStack stack;
411
412     /* The opcode jump table. */
413 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
414 #define EMIT_JUMP_TABLE_ENTRY(opcode) &&LABEL_OP_##opcode,
415     static void* opcode_jump_table[256] = { FOR_EACH_OPCODE(EMIT_JUMP_TABLE_ENTRY) };
416 #undef EMIT_JUMP_TABLE_ENTRY
417 #endif
418     
419     /* One-time setup of the opcode jump table. */
420 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
421     i = 255;
422     while (!opcode_jump_table[i])
423         opcode_jump_table[i--] = &&CAPTURING_BRACKET;
424 #endif
425     
426 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
427     stack.currentFrame->returnLocation = &&RETURN;
428 #else
429     stack.currentFrame->returnLocation = 0;
430 #endif
431     
432     stack.currentFrame->args.subjectPtr = subjectPtr;
433     stack.currentFrame->args.instructionPtr = instructionPtr;
434     stack.currentFrame->args.offset_top = offset_top;
435     stack.currentFrame->args.subpatternStart = 0;
436     startNewGroup(stack.currentFrame);
437     
438     /* This is where control jumps back to to effect "recursion" */
439     
440 RECURSE:
441
442     /* Now start processing the operations. */
443     
444 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
445     while (true)
446 #endif
447     {
448         
449 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
450 #define BEGIN_OPCODE(opcode) LABEL_OP_##opcode
451 #define NEXT_OPCODE goto *opcode_jump_table[*stack.currentFrame->args.instructionPtr]
452 #else
453 #define BEGIN_OPCODE(opcode) case OP_##opcode
454 #define NEXT_OPCODE continue
455 #endif
456         
457 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
458         NEXT_OPCODE;
459 #else
460         switch (*stack.currentFrame->args.instructionPtr)
461 #endif
462         {
463                 /* Non-capturing bracket: optimized */
464                 
465                 BEGIN_OPCODE(BRA):
466             NON_CAPTURING_BRACKET:
467                 DPRINTF(("start bracket 0\n"));
468                 do {
469                     RECURSIVE_MATCH_STARTNG_NEW_GROUP(2, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.subpatternStart);
470                     if (is_match)
471                         RRETURN;
472                     stack.currentFrame->args.instructionPtr += getOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
473                 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
474                 DPRINTF(("bracket 0 failed\n"));
475                 RRETURN;
476                 
477                 /* Skip over large extraction number data if encountered. */
478                 
479                 BEGIN_OPCODE(BRANUMBER):
480                 stack.currentFrame->args.instructionPtr += 3;
481                 NEXT_OPCODE;
482                 
483                 /* End of the pattern. */
484                 
485                 BEGIN_OPCODE(END):
486                 md.end_match_ptr = stack.currentFrame->args.subjectPtr;          /* Record where we ended */
487                 md.end_offset_top = stack.currentFrame->args.offset_top;   /* and how many extracts were taken */
488                 is_match = true;
489                 RRETURN;
490                 
491                 /* Assertion brackets. Check the alternative branches in turn - the
492                  matching won't pass the KET for an assertion. If any one branch matches,
493                  the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
494                  start of each branch to move the current point backwards, so the code at
495                  this level is identical to the lookahead case. */
496                 
497                 BEGIN_OPCODE(ASSERT):
498                 do {
499                     RECURSIVE_MATCH_STARTNG_NEW_GROUP(6, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, NULL);
500                     if (is_match)
501                         break;
502                     stack.currentFrame->args.instructionPtr += getOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
503                 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
504                 if (*stack.currentFrame->args.instructionPtr == OP_KET)
505                     RRETURN_NO_MATCH;
506                 
507                 /* Continue from after the assertion, updating the offsets high water
508                  mark, since extracts may have been taken during the assertion. */
509                 
510                 moveOpcodePtrPastAnyAlternateBranches(stack.currentFrame->args.instructionPtr);
511                 stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
512                 stack.currentFrame->args.offset_top = md.end_offset_top;
513                 NEXT_OPCODE;
514                 
515                 /* Negative assertion: all branches must fail to match */
516                 
517                 BEGIN_OPCODE(ASSERT_NOT):
518                 do {
519                     RECURSIVE_MATCH_STARTNG_NEW_GROUP(7, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, NULL);
520                     if (is_match)
521                         RRETURN_NO_MATCH;
522                     stack.currentFrame->args.instructionPtr += getOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
523                 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
524                 
525                 stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
526                 NEXT_OPCODE;
527                 
528                 /* "Once" brackets are like assertion brackets except that after a match,
529                  the point in the subject string is not moved back. Thus there can never be
530                  a move back into the brackets. Friedl calls these "atomic" subpatterns.
531                  Check the alternative branches in turn - the matching won't pass the KET
532                  for this kind of subpattern. If any one branch matches, we carry on as at
533                  the end of a normal bracket, leaving the subject pointer. */
534                 
535                 BEGIN_OPCODE(ONCE):
536                 stack.currentFrame->locals.instructionPtrAtStartOfOnce = stack.currentFrame->args.instructionPtr;
537                 stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
538                 
539                 do {
540                     RECURSIVE_MATCH_STARTNG_NEW_GROUP(9, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.subpatternStart);
541                     if (is_match)
542                         break;
543                     stack.currentFrame->args.instructionPtr += getOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
544                 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
545                 
546                 /* If hit the end of the group (which could be repeated), fail */
547                 
548                 if (*stack.currentFrame->args.instructionPtr != OP_ONCE && *stack.currentFrame->args.instructionPtr != OP_ALT)
549                     RRETURN;
550                 
551                 /* Continue as from after the assertion, updating the offsets high water
552                  mark, since extracts may have been taken. */
553                 
554                 moveOpcodePtrPastAnyAlternateBranches(stack.currentFrame->args.instructionPtr);
555                 
556                 stack.currentFrame->args.offset_top = md.end_offset_top;
557                 stack.currentFrame->args.subjectPtr = md.end_match_ptr;
558                 
559                 /* For a non-repeating ket, just continue at this level. This also
560                  happens for a repeating ket if no characters were matched in the group.
561                  This is the forcible breaking of infinite loops as implemented in Perl
562                  5.005. If there is an options reset, it will get obeyed in the normal
563                  course of events. */
564                 
565                 if (*stack.currentFrame->args.instructionPtr == OP_KET || stack.currentFrame->args.subjectPtr == stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
566                     stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
567                     NEXT_OPCODE;
568                 }
569                 
570                 /* The repeating kets try the rest of the pattern or restart from the
571                  preceding bracket, in the appropriate order. We need to reset any options
572                  that changed within the bracket before re-running it, so check the next
573                  opcode. */
574                 
575                 if (*stack.currentFrame->args.instructionPtr == OP_KETRMIN) {
576                     RECURSIVE_MATCH(10, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.subpatternStart);
577                     if (is_match)
578                         RRETURN;
579                     RECURSIVE_MATCH_STARTNG_NEW_GROUP(11, stack.currentFrame->locals.instructionPtrAtStartOfOnce, stack.currentFrame->args.subpatternStart);
580                     if (is_match)
581                         RRETURN;
582                 } else { /* OP_KETRMAX */
583                     RECURSIVE_MATCH_STARTNG_NEW_GROUP(12, stack.currentFrame->locals.instructionPtrAtStartOfOnce, stack.currentFrame->args.subpatternStart);
584                     if (is_match)
585                         RRETURN;
586                     RECURSIVE_MATCH(13, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.subpatternStart);
587                     if (is_match)
588                         RRETURN;
589                 }
590                 RRETURN;
591                 
592                 /* An alternation is the end of a branch; scan along to find the end of the
593                  bracketed group and go to there. */
594                 
595                 BEGIN_OPCODE(ALT):
596                 moveOpcodePtrPastAnyAlternateBranches(stack.currentFrame->args.instructionPtr);
597                 NEXT_OPCODE;
598                 
599                 /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
600                  that it may occur zero times. It may repeat infinitely, or not at all -
601                  i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
602                  repeat limits are compiled as a number of copies, with the optional ones
603                  preceded by BRAZERO or BRAMINZERO. */
604                 
605                 BEGIN_OPCODE(BRAZERO):
606                 {
607                     stack.currentFrame->locals.startOfRepeatingBracket = stack.currentFrame->args.instructionPtr + 1;
608                     RECURSIVE_MATCH_STARTNG_NEW_GROUP(14, stack.currentFrame->locals.startOfRepeatingBracket, stack.currentFrame->args.subpatternStart);
609                     if (is_match)
610                         RRETURN;
611                     moveOpcodePtrPastAnyAlternateBranches(stack.currentFrame->locals.startOfRepeatingBracket);
612                     stack.currentFrame->args.instructionPtr = stack.currentFrame->locals.startOfRepeatingBracket + 1 + LINK_SIZE;
613                 }
614                 NEXT_OPCODE;
615                 
616                 BEGIN_OPCODE(BRAMINZERO):
617                 {
618                     stack.currentFrame->locals.startOfRepeatingBracket = stack.currentFrame->args.instructionPtr + 1;
619                     moveOpcodePtrPastAnyAlternateBranches(stack.currentFrame->locals.startOfRepeatingBracket);
620                     RECURSIVE_MATCH_STARTNG_NEW_GROUP(15, stack.currentFrame->locals.startOfRepeatingBracket + 1 + LINK_SIZE, stack.currentFrame->args.subpatternStart);
621                     if (is_match)
622                         RRETURN;
623                     stack.currentFrame->args.instructionPtr++;
624                 }
625                 NEXT_OPCODE;
626                 
627                 /* End of a group, repeated or non-repeating. If we are at the end of
628                  an assertion "group", stop matching and return MATCH_MATCH, but record the
629                  current high water mark for use by positive assertions. Do this also
630                  for the "once" (not-backup up) groups. */
631                 
632                 BEGIN_OPCODE(KET):
633                 BEGIN_OPCODE(KETRMIN):
634                 BEGIN_OPCODE(KETRMAX):
635                 stack.currentFrame->locals.instructionPtrAtStartOfOnce = stack.currentFrame->args.instructionPtr - getOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
636                 stack.currentFrame->args.subpatternStart = stack.currentFrame->locals.subpatternStart;
637                 stack.currentFrame->locals.subpatternStart = stack.currentFrame->previousFrame->args.subpatternStart;
638
639                 if (*stack.currentFrame->locals.instructionPtrAtStartOfOnce == OP_ASSERT || *stack.currentFrame->locals.instructionPtrAtStartOfOnce == OP_ASSERT_NOT || *stack.currentFrame->locals.instructionPtrAtStartOfOnce == OP_ONCE) {
640                     md.end_match_ptr = stack.currentFrame->args.subjectPtr;      /* For ONCE */
641                     md.end_offset_top = stack.currentFrame->args.offset_top;
642                     is_match = true;
643                     RRETURN;
644                 }
645                 
646                 /* In all other cases except a conditional group we have to check the
647                  group number back at the start and if necessary complete handling an
648                  extraction by setting the offsets and bumping the high water mark. */
649                 
650                 stack.currentFrame->locals.number = *stack.currentFrame->locals.instructionPtrAtStartOfOnce - OP_BRA;
651                 
652                 /* For extended extraction brackets (large number), we have to fish out
653                  the number from a dummy opcode at the start. */
654                 
655                 if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX)
656                     stack.currentFrame->locals.number = get2ByteOpcodeValueAtOffset(stack.currentFrame->locals.instructionPtrAtStartOfOnce, 2+LINK_SIZE);
657                 stack.currentFrame->locals.offset = stack.currentFrame->locals.number << 1;
658                 
659 #ifdef DEBUG
660                 printf("end bracket %d", stack.currentFrame->locals.number);
661                 printf("\n");
662 #endif
663                 
664                 /* Test for a numbered group. This includes groups called as a result
665                  of recursion. Note that whole-pattern recursion is coded as a recurse
666                  into group 0, so it won't be picked up here. Instead, we catch it when
667                  the OP_END is reached. */
668                 
669                 if (stack.currentFrame->locals.number > 0) {
670                     if (stack.currentFrame->locals.offset >= md.offset_max)
671                         md.offset_overflow = true;
672                     else {
673                         md.offset_vector[stack.currentFrame->locals.offset] =
674                         md.offset_vector[md.offset_end - stack.currentFrame->locals.number];
675                         md.offset_vector[stack.currentFrame->locals.offset+1] = stack.currentFrame->args.subjectPtr - md.start_subject;
676                         if (stack.currentFrame->args.offset_top <= stack.currentFrame->locals.offset)
677                             stack.currentFrame->args.offset_top = stack.currentFrame->locals.offset + 2;
678                     }
679                 }
680                 
681                 /* For a non-repeating ket, just continue at this level. This also
682                  happens for a repeating ket if no characters were matched in the group.
683                  This is the forcible breaking of infinite loops as implemented in Perl
684                  5.005. If there is an options reset, it will get obeyed in the normal
685                  course of events. */
686                 
687                 if (*stack.currentFrame->args.instructionPtr == OP_KET || stack.currentFrame->args.subjectPtr == stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
688                     stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
689                     NEXT_OPCODE;
690                 }
691                 
692                 /* The repeating kets try the rest of the pattern or restart from the
693                  preceding bracket, in the appropriate order. */
694                 
695                 if (*stack.currentFrame->args.instructionPtr == OP_KETRMIN) {
696                     RECURSIVE_MATCH(16, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.subpatternStart);
697                     if (is_match)
698                         RRETURN;
699                     RECURSIVE_MATCH_STARTNG_NEW_GROUP(17, stack.currentFrame->locals.instructionPtrAtStartOfOnce, stack.currentFrame->args.subpatternStart);
700                     if (is_match)
701                         RRETURN;
702                 } else { /* OP_KETRMAX */
703                     RECURSIVE_MATCH_STARTNG_NEW_GROUP(18, stack.currentFrame->locals.instructionPtrAtStartOfOnce, stack.currentFrame->args.subpatternStart);
704                     if (is_match)
705                         RRETURN;
706                     RECURSIVE_MATCH(19, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.subpatternStart);
707                     if (is_match)
708                         RRETURN;
709                 }
710                 RRETURN;
711                 
712                 /* Start of subject, or after internal newline if multiline. */
713                 
714                 BEGIN_OPCODE(CIRC):
715                 if (stack.currentFrame->args.subjectPtr != md.start_subject && (!md.multiline || !isNewline(stack.currentFrame->args.subjectPtr[-1])))
716                     RRETURN_NO_MATCH;
717                 stack.currentFrame->args.instructionPtr++;
718                 NEXT_OPCODE;
719                 
720                 /* End of subject, or before internal newline if multiline. */
721                 
722                 BEGIN_OPCODE(DOLL):
723                 if (stack.currentFrame->args.subjectPtr < md.end_subject && (!md.multiline || !isNewline(*stack.currentFrame->args.subjectPtr)))
724                     RRETURN_NO_MATCH;
725                 stack.currentFrame->args.instructionPtr++;
726                 NEXT_OPCODE;
727                 
728                 /* Word boundary assertions */
729                 
730                 BEGIN_OPCODE(NOT_WORD_BOUNDARY):
731                 BEGIN_OPCODE(WORD_BOUNDARY):
732                 /* Find out if the previous and current characters are "word" characters.
733                  It takes a bit more work in UTF-8 mode. Characters > 128 are assumed to
734                  be "non-word" characters. */
735                 
736                 if (stack.currentFrame->args.subjectPtr == md.start_subject)
737                     prev_is_word = false;
738                 else {
739                     const UChar* lastptr = stack.currentFrame->args.subjectPtr - 1;
740                     while(isTrailingSurrogate(*lastptr))
741                         lastptr--;
742                     getChar(c, lastptr);
743                     prev_is_word = isWordChar(c);
744                 }
745                 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
746                     cur_is_word = false;
747                 else {
748                     getChar(c, stack.currentFrame->args.subjectPtr);
749                     cur_is_word = isWordChar(c);
750                 }
751                 
752                 /* Now see if the situation is what we want */
753                 
754                 if ((*stack.currentFrame->args.instructionPtr++ == OP_WORD_BOUNDARY) ? cur_is_word == prev_is_word : cur_is_word != prev_is_word)
755                     RRETURN_NO_MATCH;
756                 NEXT_OPCODE;
757                 
758                 /* Match a single character type; inline for speed */
759                 
760                 BEGIN_OPCODE(ANY):
761                 if (stack.currentFrame->args.subjectPtr < md.end_subject && isNewline(*stack.currentFrame->args.subjectPtr))
762                     RRETURN_NO_MATCH;
763                 if (stack.currentFrame->args.subjectPtr++ >= md.end_subject)
764                     RRETURN_NO_MATCH;
765                 while (stack.currentFrame->args.subjectPtr < md.end_subject && isTrailingSurrogate(*stack.currentFrame->args.subjectPtr))
766                     stack.currentFrame->args.subjectPtr++;
767                 stack.currentFrame->args.instructionPtr++;
768                 NEXT_OPCODE;
769                 
770                 BEGIN_OPCODE(NOT_DIGIT):
771                 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
772                     RRETURN_NO_MATCH;
773                 getCharAndAdvance(c, stack.currentFrame->args.subjectPtr);
774                 if (isASCIIDigit(c))
775                     RRETURN_NO_MATCH;
776                 stack.currentFrame->args.instructionPtr++;
777                 NEXT_OPCODE;
778                 
779                 BEGIN_OPCODE(DIGIT):
780                 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
781                     RRETURN_NO_MATCH;
782                 getCharAndAdvance(c, stack.currentFrame->args.subjectPtr);
783                 if (!isASCIIDigit(c))
784                     RRETURN_NO_MATCH;
785                 stack.currentFrame->args.instructionPtr++;
786                 NEXT_OPCODE;
787                 
788                 BEGIN_OPCODE(NOT_WHITESPACE):
789                 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
790                     RRETURN_NO_MATCH;
791                 getCharAndAdvance(c, stack.currentFrame->args.subjectPtr);
792                 if (isSpaceChar(c))
793                     RRETURN_NO_MATCH;
794                 stack.currentFrame->args.instructionPtr++;
795                 NEXT_OPCODE;
796                 
797                 BEGIN_OPCODE(WHITESPACE):
798                 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
799                     RRETURN_NO_MATCH;
800                 getCharAndAdvance(c, stack.currentFrame->args.subjectPtr);
801                 if (!isSpaceChar(c))
802                     RRETURN_NO_MATCH;
803                 stack.currentFrame->args.instructionPtr++;
804                 NEXT_OPCODE;
805                 
806                 BEGIN_OPCODE(NOT_WORDCHAR):
807                 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
808                     RRETURN_NO_MATCH;
809                 getCharAndAdvance(c, stack.currentFrame->args.subjectPtr);
810                 if (isWordChar(c))
811                     RRETURN_NO_MATCH;
812                 stack.currentFrame->args.instructionPtr++;
813                 NEXT_OPCODE;
814                 
815                 BEGIN_OPCODE(WORDCHAR):
816                 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
817                     RRETURN_NO_MATCH;
818                 getCharAndAdvance(c, stack.currentFrame->args.subjectPtr);
819                 if (!isWordChar(c))
820                     RRETURN_NO_MATCH;
821                 stack.currentFrame->args.instructionPtr++;
822                 NEXT_OPCODE;
823                 
824                 /* Match a back reference, possibly repeatedly. Look past the end of the
825                  item to see if there is repeat information following. The code is similar
826                  to that for character classes, but repeated for efficiency. Then obey
827                  similar code to character type repeats - written out again for speed.
828                  However, if the referenced string is the empty string, always treat
829                  it as matched, any number of times (otherwise there could be infinite
830                  loops). */
831                 
832                 BEGIN_OPCODE(REF):
833                 stack.currentFrame->locals.offset = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1) << 1;               /* Doubled ref number */
834                 stack.currentFrame->args.instructionPtr += 3;                                 /* Advance past item */
835                 
836                 /* If the reference is unset, set the length to be longer than the amount
837                  of subject left; this ensures that every attempt at a match fails. We
838                  can't just fail here, because of the possibility of quantifiers with zero
839                  minima. */
840                 
841                 if (stack.currentFrame->locals.offset >= stack.currentFrame->args.offset_top || md.offset_vector[stack.currentFrame->locals.offset] < 0)
842                     stack.currentFrame->locals.length = 0;
843                 else
844                     stack.currentFrame->locals.length = md.offset_vector[stack.currentFrame->locals.offset+1] - md.offset_vector[stack.currentFrame->locals.offset];
845                 
846                 /* Set up for repetition, or handle the non-repeated case */
847                 
848                 switch (*stack.currentFrame->args.instructionPtr) {
849                 case OP_CRSTAR:
850                 case OP_CRMINSTAR:
851                 case OP_CRPLUS:
852                 case OP_CRMINPLUS:
853                 case OP_CRQUERY:
854                 case OP_CRMINQUERY:
855                     c = *stack.currentFrame->args.instructionPtr++ - OP_CRSTAR;
856                     minimize = (c & 1);
857                     min = rep_min[c];                 /* Pick up values from tables; */
858                     stack.currentFrame->locals.max = rep_max[c];                 /* zero for max => infinity */
859                     if (stack.currentFrame->locals.max == 0)
860                         stack.currentFrame->locals.max = INT_MAX;
861                     break;
862                     
863                 case OP_CRRANGE:
864                 case OP_CRMINRANGE:
865                     minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
866                     min = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
867                     stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 3);
868                     if (stack.currentFrame->locals.max == 0)
869                         stack.currentFrame->locals.max = INT_MAX;
870                     stack.currentFrame->args.instructionPtr += 5;
871                     break;
872                 
873                 default:               /* No repeat follows */
874                     if (!match_ref(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
875                         RRETURN_NO_MATCH;
876                     stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
877                     NEXT_OPCODE;
878                 }
879                 
880                 /* If the length of the reference is zero, just continue with the
881                  main loop. */
882                 
883                 if (stack.currentFrame->locals.length == 0)
884                     NEXT_OPCODE;
885                 
886                 /* First, ensure the minimum number of matches are present. */
887                 
888                 for (i = 1; i <= min; i++) {
889                     if (!match_ref(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
890                         RRETURN_NO_MATCH;
891                     stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
892                 }
893                 
894                 /* If min = max, continue at the same level without recursion.
895                  They are not both allowed to be zero. */
896                 
897                 if (min == stack.currentFrame->locals.max)
898                     NEXT_OPCODE;
899                 
900                 /* If minimizing, keep trying and advancing the pointer */
901                 
902                 if (minimize) {
903                     for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
904                         RECURSIVE_MATCH(20, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
905                         if (is_match)
906                             RRETURN;
907                         if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || !match_ref(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
908                             RRETURN;
909                         stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
910                     }
911                     /* Control never reaches here */
912                 }
913                 
914                 /* If maximizing, find the longest string and work backwards */
915                 
916                 else {
917                     stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
918                     for (i = min; i < stack.currentFrame->locals.max; i++) {
919                         if (!match_ref(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
920                             break;
921                         stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
922                     }
923                     while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
924                         RECURSIVE_MATCH(21, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
925                         if (is_match)
926                             RRETURN;
927                         stack.currentFrame->args.subjectPtr -= stack.currentFrame->locals.length;
928                     }
929                     RRETURN_NO_MATCH;
930                 }
931                 ASSERT_NOT_REACHED();
932                 
933                 /* Match a bit-mapped character class, possibly repeatedly. This op code is
934                  used when all the characters in the class have values in the range 0-255,
935                  and either the matching is caseful, or the characters are in the range
936                  0-127 when UTF-8 processing is enabled. The only difference between
937                  OP_CLASS and OP_NCLASS occurs when a data character outside the range is
938                  encountered.
939                  
940                  First, look past the end of the item to see if there is repeat information
941                  following. Then obey similar code to character type repeats - written out
942                  again for speed. */
943                 
944                 BEGIN_OPCODE(NCLASS):
945                 BEGIN_OPCODE(CLASS):
946                 stack.currentFrame->locals.data = stack.currentFrame->args.instructionPtr + 1;                /* Save for matching */
947                 stack.currentFrame->args.instructionPtr += 33;                     /* Advance past the item */
948                 
949                 switch (*stack.currentFrame->args.instructionPtr) {
950                 case OP_CRSTAR:
951                 case OP_CRMINSTAR:
952                 case OP_CRPLUS:
953                 case OP_CRMINPLUS:
954                 case OP_CRQUERY:
955                 case OP_CRMINQUERY:
956                     c = *stack.currentFrame->args.instructionPtr++ - OP_CRSTAR;
957                     minimize = (c & 1);
958                     min = rep_min[c];                 /* Pick up values from tables; */
959                     stack.currentFrame->locals.max = rep_max[c];                 /* zero for max => infinity */
960                     if (stack.currentFrame->locals.max == 0)
961                         stack.currentFrame->locals.max = INT_MAX;
962                     break;
963                     
964                 case OP_CRRANGE:
965                 case OP_CRMINRANGE:
966                     minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
967                     min = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
968                     stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 3);
969                     if (stack.currentFrame->locals.max == 0)
970                         stack.currentFrame->locals.max = INT_MAX;
971                     stack.currentFrame->args.instructionPtr += 5;
972                     break;
973                     
974                 default:               /* No repeat follows */
975                     min = stack.currentFrame->locals.max = 1;
976                     break;
977                 }
978                 
979                 /* First, ensure the minimum number of matches are present. */
980                 
981                 for (i = 1; i <= min; i++) {
982                     if (stack.currentFrame->args.subjectPtr >= md.end_subject)
983                         RRETURN_NO_MATCH;
984                     getCharAndAdvance(c, stack.currentFrame->args.subjectPtr);
985                     if (c > 255) {
986                         if (stack.currentFrame->locals.data[-1] == OP_CLASS)
987                             RRETURN_NO_MATCH;
988                     } else {
989                         if (!(stack.currentFrame->locals.data[c / 8] & (1 << (c & 7))))
990                             RRETURN_NO_MATCH;
991                     }
992                 }
993                 
994                 /* If max == min we can continue with the main loop without the
995                  need to recurse. */
996                 
997                 if (min == stack.currentFrame->locals.max)
998                     NEXT_OPCODE;      
999                 
1000                 /* If minimizing, keep testing the rest of the expression and advancing
1001                  the pointer while it matches the class. */
1002                 if (minimize) {
1003                     for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1004                         RECURSIVE_MATCH(22, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1005                         if (is_match)
1006                             RRETURN;
1007                         if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.end_subject)
1008                             RRETURN;
1009                         getCharAndAdvance(c, stack.currentFrame->args.subjectPtr);
1010                         if (c > 255) {
1011                             if (stack.currentFrame->locals.data[-1] == OP_CLASS)
1012                                 RRETURN;
1013                         } else {
1014                             if ((stack.currentFrame->locals.data[c/8] & (1 << (c&7))) == 0)
1015                                 RRETURN;
1016                         }
1017                     }
1018                     /* Control never reaches here */
1019                 }
1020                 /* If maximizing, find the longest possible run, then work backwards. */
1021                 else {
1022                     stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1023                     
1024                     for (i = min; i < stack.currentFrame->locals.max; i++) {
1025                         if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1026                             break;
1027                         int length;
1028                         getCharAndLength(c, stack.currentFrame->args.subjectPtr, length);
1029                         if (c > 255) {
1030                             if (stack.currentFrame->locals.data[-1] == OP_CLASS)
1031                                 break;
1032                         } else {
1033                             if (!(stack.currentFrame->locals.data[c / 8] & (1 << (c & 7))))
1034                                 break;
1035                         }
1036                         stack.currentFrame->args.subjectPtr += length;
1037                     }
1038                     for (;;) {
1039                         RECURSIVE_MATCH(24, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1040                         if (is_match)
1041                             RRETURN;
1042                         if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
1043                             break;        /* Stop if tried at original pos */
1044                         BACKCHAR(stack.currentFrame->args.subjectPtr);
1045                     }
1046                     
1047                     RRETURN;
1048                 }
1049                 /* Control never reaches here */
1050                 
1051                 /* Match an extended character class. This opcode is encountered only
1052                  in UTF-8 mode, because that's the only time it is compiled. */
1053                 
1054                 BEGIN_OPCODE(XCLASS):
1055                 stack.currentFrame->locals.data = stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE;                /* Save for matching */
1056                 stack.currentFrame->args.instructionPtr += getOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);                      /* Advance past the item */
1057                 
1058                 switch (*stack.currentFrame->args.instructionPtr) {
1059                 case OP_CRSTAR:
1060                 case OP_CRMINSTAR:
1061                 case OP_CRPLUS:
1062                 case OP_CRMINPLUS:
1063                 case OP_CRQUERY:
1064                 case OP_CRMINQUERY:
1065                     c = *stack.currentFrame->args.instructionPtr++ - OP_CRSTAR;
1066                     minimize = (c & 1);
1067                     min = rep_min[c];                 /* Pick up values from tables; */
1068                     stack.currentFrame->locals.max = rep_max[c];                 /* zero for max => infinity */
1069                     if (stack.currentFrame->locals.max == 0)
1070                         stack.currentFrame->locals.max = INT_MAX;
1071                     break;
1072                     
1073                 case OP_CRRANGE:
1074                 case OP_CRMINRANGE:
1075                     minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
1076                     min = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
1077                     stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 3);
1078                     if (stack.currentFrame->locals.max == 0)
1079                         stack.currentFrame->locals.max = INT_MAX;
1080                     stack.currentFrame->args.instructionPtr += 5;
1081                     break;
1082                     
1083                 default:               /* No repeat follows */
1084                     min = stack.currentFrame->locals.max = 1;
1085             }
1086                 
1087                 /* First, ensure the minimum number of matches are present. */
1088                 
1089                 for (i = 1; i <= min; i++) {
1090                     if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1091                         RRETURN_NO_MATCH;
1092                     getCharAndAdvance(c, stack.currentFrame->args.subjectPtr);
1093                     if (!_pcre_xclass(c, stack.currentFrame->locals.data))
1094                         RRETURN_NO_MATCH;
1095                 }
1096                 
1097                 /* If max == min we can continue with the main loop without the
1098                  need to recurse. */
1099                 
1100                 if (min == stack.currentFrame->locals.max)
1101                     NEXT_OPCODE;
1102                 
1103                 /* If minimizing, keep testing the rest of the expression and advancing
1104                  the pointer while it matches the class. */
1105                 
1106                 if (minimize) {
1107                     for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1108                         RECURSIVE_MATCH(26, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1109                         if (is_match)
1110                             RRETURN;
1111                         if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.end_subject)
1112                             RRETURN;
1113                         getCharAndAdvance(c, stack.currentFrame->args.subjectPtr);
1114                         if (!_pcre_xclass(c, stack.currentFrame->locals.data))
1115                             RRETURN;
1116                     }
1117                     /* Control never reaches here */
1118                 }
1119                 
1120                 /* If maximizing, find the longest possible run, then work backwards. */
1121                 
1122                 else {
1123                     stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1124                     for (i = min; i < stack.currentFrame->locals.max; i++) {
1125                         if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1126                             break;
1127                         int length;
1128                         getCharAndLength(c, stack.currentFrame->args.subjectPtr, length);
1129                         if (!_pcre_xclass(c, stack.currentFrame->locals.data))
1130                             break;
1131                         stack.currentFrame->args.subjectPtr += length;
1132                     }
1133                     for(;;) {
1134                         RECURSIVE_MATCH(27, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1135                         if (is_match)
1136                             RRETURN;
1137                         if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
1138                             break;        /* Stop if tried at original pos */
1139                         BACKCHAR(stack.currentFrame->args.subjectPtr)
1140                     }
1141                     RRETURN;
1142                 }
1143                 
1144                 /* Control never reaches here */
1145                 
1146                 /* Match a single character, casefully */
1147                 
1148                 BEGIN_OPCODE(CHAR):
1149                 stack.currentFrame->locals.length = 1;
1150                 stack.currentFrame->args.instructionPtr++;
1151                 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
1152             {
1153                 int dc;
1154                 stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length;
1155                 switch (md.end_subject - stack.currentFrame->args.subjectPtr) {
1156                 case 0:
1157                     RRETURN_NO_MATCH;
1158                 case 1:
1159                     dc = *stack.currentFrame->args.subjectPtr++;
1160                     if (isLeadingSurrogate(dc))
1161                         RRETURN_NO_MATCH;
1162                     break;
1163                     default:
1164                     getCharAndAdvance(dc, stack.currentFrame->args.subjectPtr);
1165                 }
1166                 if (stack.currentFrame->locals.fc != dc)
1167                     RRETURN_NO_MATCH;
1168             }
1169                 NEXT_OPCODE;
1170                 
1171                 /* Match a single character, caselessly */
1172                 
1173                 BEGIN_OPCODE(CHARNC):
1174                 stack.currentFrame->locals.length = 1;
1175                 stack.currentFrame->args.instructionPtr++;
1176                 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
1177                 
1178                 if (md.end_subject - stack.currentFrame->args.subjectPtr == 0)
1179                     RRETURN_NO_MATCH;
1180                 
1181             {
1182                 int dc;
1183                 if (md.end_subject - stack.currentFrame->args.subjectPtr == 1) {
1184                     dc = *stack.currentFrame->args.subjectPtr++;
1185                     if (isLeadingSurrogate(dc))
1186                         RRETURN_NO_MATCH;
1187                 } else
1188                     getCharAndAdvance(dc, stack.currentFrame->args.subjectPtr);
1189                 stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length;
1190                 
1191                 /* If we have Unicode property support, we can use it to test the other
1192                  case of the character, if there is one. */
1193                 
1194                 if (stack.currentFrame->locals.fc != dc) {
1195                     if (dc != _pcre_ucp_othercase(stack.currentFrame->locals.fc))
1196                         RRETURN_NO_MATCH;
1197                 }
1198             }
1199                 NEXT_OPCODE;
1200                 
1201                 /* Match a single ASCII character. */
1202                 
1203                 BEGIN_OPCODE(ASCII_CHAR):
1204                 if (md.end_subject == stack.currentFrame->args.subjectPtr)
1205                     RRETURN_NO_MATCH;
1206                 if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->args.instructionPtr[1])
1207                     RRETURN_NO_MATCH;
1208                 ++stack.currentFrame->args.subjectPtr;
1209                 stack.currentFrame->args.instructionPtr += 2;
1210                 NEXT_OPCODE;
1211                 
1212                 /* Match one of two cases of an ASCII character. */
1213                 
1214                 BEGIN_OPCODE(ASCII_LETTER_NC):
1215                 if (md.end_subject == stack.currentFrame->args.subjectPtr)
1216                     RRETURN_NO_MATCH;
1217                 if ((*stack.currentFrame->args.subjectPtr | 0x20) != stack.currentFrame->args.instructionPtr[1])
1218                     RRETURN_NO_MATCH;
1219                 ++stack.currentFrame->args.subjectPtr;
1220                 stack.currentFrame->args.instructionPtr += 2;
1221                 NEXT_OPCODE;
1222                 
1223                 /* Match a single character repeatedly; different opcodes share code. */
1224                 
1225                 BEGIN_OPCODE(EXACT):
1226                 min = stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
1227                 minimize = false;
1228                 stack.currentFrame->args.instructionPtr += 3;
1229                 goto REPEATCHAR;
1230                 
1231                 BEGIN_OPCODE(UPTO):
1232                 BEGIN_OPCODE(MINUPTO):
1233                 min = 0;
1234                 stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
1235                 minimize = *stack.currentFrame->args.instructionPtr == OP_MINUPTO;
1236                 stack.currentFrame->args.instructionPtr += 3;
1237                 goto REPEATCHAR;
1238                 
1239                 BEGIN_OPCODE(STAR):
1240                 BEGIN_OPCODE(MINSTAR):
1241                 BEGIN_OPCODE(PLUS):
1242                 BEGIN_OPCODE(MINPLUS):
1243                 BEGIN_OPCODE(QUERY):
1244                 BEGIN_OPCODE(MINQUERY):
1245                 c = *stack.currentFrame->args.instructionPtr++ - OP_STAR;
1246                 minimize = (c & 1);
1247                 min = rep_min[c];                 /* Pick up values from tables; */
1248                 stack.currentFrame->locals.max = rep_max[c];                 /* zero for max => infinity */
1249                 if (stack.currentFrame->locals.max == 0)
1250                     stack.currentFrame->locals.max = INT_MAX;
1251                 
1252                 /* Common code for all repeated single-character matches. We can give
1253                  up quickly if there are fewer than the minimum number of characters left in
1254                  the subject. */
1255                 
1256             REPEATCHAR:
1257                 
1258                 stack.currentFrame->locals.length = 1;
1259                 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
1260                 if (min * (stack.currentFrame->locals.fc > 0xFFFF ? 2 : 1) > md.end_subject - stack.currentFrame->args.subjectPtr)
1261                     RRETURN_NO_MATCH;
1262                 stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length;
1263                 
1264                 if (stack.currentFrame->locals.fc <= 0xFFFF) {
1265                     int othercase = md.ignoreCase ? _pcre_ucp_othercase(stack.currentFrame->locals.fc) : -1;
1266                     
1267                     for (i = 1; i <= min; i++) {
1268                         if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != othercase)
1269                             RRETURN_NO_MATCH;
1270                         ++stack.currentFrame->args.subjectPtr;
1271                     }
1272                     
1273                     if (min == stack.currentFrame->locals.max)
1274                         NEXT_OPCODE;
1275                     
1276                     if (minimize) {
1277                         stack.currentFrame->locals.repeat_othercase = othercase;
1278                         for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1279                             RECURSIVE_MATCH(28, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1280                             if (is_match)
1281                                 RRETURN;
1282                             if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.end_subject)
1283                                 RRETURN;
1284                             if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.repeat_othercase)
1285                                 RRETURN;
1286                             ++stack.currentFrame->args.subjectPtr;
1287                         }
1288                         /* Control never reaches here */
1289                     } else {
1290                         stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1291                         for (i = min; i < stack.currentFrame->locals.max; i++) {
1292                             if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1293                                 break;
1294                             if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != othercase)
1295                                 break;
1296                             ++stack.currentFrame->args.subjectPtr;
1297                         }
1298                         while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
1299                             RECURSIVE_MATCH(29, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1300                             if (is_match)
1301                                 RRETURN;
1302                             --stack.currentFrame->args.subjectPtr;
1303                         }
1304                         RRETURN_NO_MATCH;
1305                     }
1306                     /* Control never reaches here */
1307                 } else {
1308                     /* No case on surrogate pairs, so no need to bother with "othercase". */
1309                     
1310                     for (i = 1; i <= min; i++) {
1311                         int nc;
1312                         getChar(nc, stack.currentFrame->args.subjectPtr);
1313                         if (nc != stack.currentFrame->locals.fc)
1314                             RRETURN_NO_MATCH;
1315                         stack.currentFrame->args.subjectPtr += 2;
1316                     }
1317                     
1318                     if (min == stack.currentFrame->locals.max)
1319                         NEXT_OPCODE;
1320                     
1321                     if (minimize) {
1322                         for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1323                             int nc;
1324                             RECURSIVE_MATCH(30, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1325                             if (is_match)
1326                                 RRETURN;
1327                             if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.end_subject)
1328                                 RRETURN;
1329                             getChar(nc, stack.currentFrame->args.subjectPtr);
1330                             if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
1331                                 RRETURN;
1332                             stack.currentFrame->args.subjectPtr += 2;
1333                         }
1334                         /* Control never reaches here */
1335                     } else {
1336                         stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1337                         for (i = min; i < stack.currentFrame->locals.max; i++) {
1338                             int nc;
1339                             if (stack.currentFrame->args.subjectPtr > md.end_subject - 2)
1340                                 break;
1341                             getChar(nc, stack.currentFrame->args.subjectPtr);
1342                             if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
1343                                 break;
1344                             stack.currentFrame->args.subjectPtr += 2;
1345                         }
1346                         while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
1347                             RECURSIVE_MATCH(31, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1348                             if (is_match)
1349                                 RRETURN;
1350                             stack.currentFrame->args.subjectPtr -= 2;
1351                         }
1352                         RRETURN_NO_MATCH;
1353                     }
1354                     /* Control never reaches here */
1355                 }
1356                 /* Control never reaches here */
1357                 
1358                 /* Match a negated single one-byte character. The character we are
1359                  checking can be multibyte. */
1360                 
1361                 BEGIN_OPCODE(NOT):
1362                 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1363                     RRETURN_NO_MATCH;
1364                 stack.currentFrame->args.instructionPtr++;
1365                 getCharAndAdvance(c, stack.currentFrame->args.subjectPtr);
1366                 if (md.ignoreCase) {
1367                     if (c < 128)
1368                         c = toLowerCase(c);
1369                     if (toLowerCase(*stack.currentFrame->args.instructionPtr++) == c)
1370                         RRETURN_NO_MATCH;
1371                 } else {
1372                     if (*stack.currentFrame->args.instructionPtr++ == c)
1373                         RRETURN_NO_MATCH;
1374                 }
1375                 NEXT_OPCODE;
1376                 
1377                 /* Match a negated single one-byte character repeatedly. This is almost a
1378                  repeat of the code for a repeated single character, but I haven't found a
1379                  nice way of commoning these up that doesn't require a test of the
1380                  positive/negative option for each character match. Maybe that wouldn't add
1381                  very much to the time taken, but character matching *is* what this is all
1382                  about... */
1383                 
1384                 BEGIN_OPCODE(NOTEXACT):
1385                 min = stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
1386                 minimize = false;
1387                 stack.currentFrame->args.instructionPtr += 3;
1388                 goto REPEATNOTCHAR;
1389                 
1390                 BEGIN_OPCODE(NOTUPTO):
1391                 BEGIN_OPCODE(NOTMINUPTO):
1392                 min = 0;
1393                 stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
1394                 minimize = *stack.currentFrame->args.instructionPtr == OP_NOTMINUPTO;
1395                 stack.currentFrame->args.instructionPtr += 3;
1396                 goto REPEATNOTCHAR;
1397                 
1398                 BEGIN_OPCODE(NOTSTAR):
1399                 BEGIN_OPCODE(NOTMINSTAR):
1400                 BEGIN_OPCODE(NOTPLUS):
1401                 BEGIN_OPCODE(NOTMINPLUS):
1402                 BEGIN_OPCODE(NOTQUERY):
1403                 BEGIN_OPCODE(NOTMINQUERY):
1404                 c = *stack.currentFrame->args.instructionPtr++ - OP_NOTSTAR;
1405                 minimize = (c & 1);
1406                 min = rep_min[c];                 /* Pick up values from tables; */
1407                 stack.currentFrame->locals.max = rep_max[c];                 /* zero for max => infinity */
1408                 if (stack.currentFrame->locals.max == 0) stack.currentFrame->locals.max = INT_MAX;
1409                 
1410                 /* Common code for all repeated single-byte matches. We can give up quickly
1411                  if there are fewer than the minimum number of bytes left in the
1412                  subject. */
1413                 
1414             REPEATNOTCHAR:
1415                 if (min > md.end_subject - stack.currentFrame->args.subjectPtr)
1416                     RRETURN_NO_MATCH;
1417                 stack.currentFrame->locals.fc = *stack.currentFrame->args.instructionPtr++;
1418                 
1419                 /* The code is duplicated for the caseless and caseful cases, for speed,
1420                  since matching characters is likely to be quite common. First, ensure the
1421                  minimum number of matches are present. If min = max, continue at the same
1422                  level without recursing. Otherwise, if minimizing, keep trying the rest of
1423                  the expression and advancing one matching character if failing, up to the
1424                  maximum. Alternatively, if maximizing, find the maximum number of
1425                  characters and work backwards. */
1426                 
1427                 DPRINTF(("negative matching %c{%d,%d}\n", stack.currentFrame->locals.fc, min, stack.currentFrame->locals.max));
1428                 
1429                 if (md.ignoreCase) {
1430                     if (stack.currentFrame->locals.fc < 128)
1431                         stack.currentFrame->locals.fc = toLowerCase(stack.currentFrame->locals.fc);
1432                     
1433                     {
1434                         int d;
1435                         for (i = 1; i <= min; i++) {
1436                             getCharAndAdvance(d, stack.currentFrame->args.subjectPtr);
1437                             if (d < 128)
1438                                 d = toLowerCase(d);
1439                             if (stack.currentFrame->locals.fc == d)
1440                                 RRETURN_NO_MATCH;
1441                         }
1442                     }
1443                     
1444                     if (min == stack.currentFrame->locals.max)
1445                         NEXT_OPCODE;      
1446                     
1447                     if (minimize) {
1448                         int d;
1449                         for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1450                             RECURSIVE_MATCH(38, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1451                             if (is_match)
1452                                 RRETURN;
1453                             getCharAndAdvance(d, stack.currentFrame->args.subjectPtr);
1454                             if (d < 128)
1455                                 d = toLowerCase(d);
1456                             if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.end_subject || stack.currentFrame->locals.fc == d)
1457                                 RRETURN;
1458                         }
1459                         /* Control never reaches here */
1460                     }
1461                     
1462                     /* Maximize case */
1463                     
1464                     else {
1465                         stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1466                         
1467                         {
1468                             int d;
1469                             for (i = min; i < stack.currentFrame->locals.max; i++) {
1470                                 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1471                                     break;
1472                                 int length;
1473                                 getCharAndLength(d, stack.currentFrame->args.subjectPtr, length);
1474                                 if (d < 128)
1475                                     d = toLowerCase(d);
1476                                 if (stack.currentFrame->locals.fc == d)
1477                                     break;
1478                                 stack.currentFrame->args.subjectPtr += length;
1479                             }
1480                             for (;;) {
1481                                 RECURSIVE_MATCH(40, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1482                                 if (is_match)
1483                                     RRETURN;
1484                                 if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
1485                                     break;        /* Stop if tried at original pos */
1486                                 BACKCHAR(stack.currentFrame->args.subjectPtr);
1487                             }
1488                         }
1489                         
1490                         RRETURN;
1491                     }
1492                     /* Control never reaches here */
1493                 }
1494                 
1495                 /* Caseful comparisons */
1496                 
1497                 else {
1498                     {
1499                         int d;
1500                         for (i = 1; i <= min; i++) {
1501                             getCharAndAdvance(d, stack.currentFrame->args.subjectPtr);
1502                             if (stack.currentFrame->locals.fc == d)
1503                                 RRETURN_NO_MATCH;
1504                         }
1505                     }
1506                     
1507                     if (min == stack.currentFrame->locals.max)
1508                         NEXT_OPCODE;
1509                     
1510                     if (minimize) {
1511                         int d;
1512                         for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1513                             RECURSIVE_MATCH(42, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1514                             if (is_match)
1515                                 RRETURN;
1516                             getCharAndAdvance(d, stack.currentFrame->args.subjectPtr);
1517                             if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.end_subject || stack.currentFrame->locals.fc == d)
1518                                 RRETURN;
1519                         }
1520                         /* Control never reaches here */
1521                     }
1522                     
1523                     /* Maximize case */
1524                     
1525                     else {
1526                         stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1527                         
1528                         {
1529                             int d;
1530                             for (i = min; i < stack.currentFrame->locals.max; i++) {
1531                                 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1532                                     break;
1533                                 int length;
1534                                 getCharAndLength(d, stack.currentFrame->args.subjectPtr, length);
1535                                 if (stack.currentFrame->locals.fc == d)
1536                                     break;
1537                                 stack.currentFrame->args.subjectPtr += length;
1538                             }
1539                             for (;;) {
1540                                 RECURSIVE_MATCH(44, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1541                                 if (is_match)
1542                                     RRETURN;
1543                                 if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
1544                                     break;        /* Stop if tried at original pos */
1545                                 BACKCHAR(stack.currentFrame->args.subjectPtr);
1546                             }
1547                         }
1548                         
1549                         RRETURN;
1550                     }
1551                 }
1552                 /* Control never reaches here */
1553                 
1554                 /* Match a single character type repeatedly; several different opcodes
1555                  share code. This is very similar to the code for single characters, but we
1556                  repeat it in the interests of efficiency. */
1557                 
1558                 BEGIN_OPCODE(TYPEEXACT):
1559                 min = stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
1560                 minimize = true;
1561                 stack.currentFrame->args.instructionPtr += 3;
1562                 goto REPEATTYPE;
1563                 
1564                 BEGIN_OPCODE(TYPEUPTO):
1565                 BEGIN_OPCODE(TYPEMINUPTO):
1566                 min = 0;
1567                 stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
1568                 minimize = *stack.currentFrame->args.instructionPtr == OP_TYPEMINUPTO;
1569                 stack.currentFrame->args.instructionPtr += 3;
1570                 goto REPEATTYPE;
1571                 
1572                 BEGIN_OPCODE(TYPESTAR):
1573                 BEGIN_OPCODE(TYPEMINSTAR):
1574                 BEGIN_OPCODE(TYPEPLUS):
1575                 BEGIN_OPCODE(TYPEMINPLUS):
1576                 BEGIN_OPCODE(TYPEQUERY):
1577                 BEGIN_OPCODE(TYPEMINQUERY):
1578                 c = *stack.currentFrame->args.instructionPtr++ - OP_TYPESTAR;
1579                 minimize = (c & 1);
1580                 min = rep_min[c];                 /* Pick up values from tables; */
1581                 stack.currentFrame->locals.max = rep_max[c];                 /* zero for max => infinity */
1582                 if (stack.currentFrame->locals.max == 0)
1583                     stack.currentFrame->locals.max = INT_MAX;
1584                 
1585                 /* Common code for all repeated single character type matches. Note that
1586                  in UTF-8 mode, '.' matches a character of any length, but for the other
1587                  character types, the valid characters are all one-byte long. */
1588                 
1589             REPEATTYPE:
1590                 stack.currentFrame->locals.ctype = *stack.currentFrame->args.instructionPtr++;      /* Code for the character type */
1591                 
1592                 /* First, ensure the minimum number of matches are present. Use inline
1593                  code for maximizing the speed, and do the type test once at the start
1594                  (i.e. keep it out of the loop). Also we can test that there are at least
1595                  the minimum number of bytes before we start. This isn't as effective in
1596                  UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that
1597                  is tidier. Also separate the UCP code, which can be the same for both UTF-8
1598                  and single-bytes. */
1599                 
1600                 if (min > md.end_subject - stack.currentFrame->args.subjectPtr)
1601                     RRETURN_NO_MATCH;
1602                 if (min > 0) {
1603                     switch(stack.currentFrame->locals.ctype) {
1604                         case OP_ANY:
1605                             for (i = 1; i <= min; i++) {
1606                                 if (stack.currentFrame->args.subjectPtr >= md.end_subject || isNewline(*stack.currentFrame->args.subjectPtr))
1607                                     RRETURN_NO_MATCH;
1608                                 ++stack.currentFrame->args.subjectPtr;
1609                                 while (stack.currentFrame->args.subjectPtr < md.end_subject && isTrailingSurrogate(*stack.currentFrame->args.subjectPtr))
1610                                     stack.currentFrame->args.subjectPtr++;
1611                             }
1612                             break;
1613                             
1614                             case OP_NOT_DIGIT:
1615                             for (i = 1; i <= min; i++) {
1616                                 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1617                                     RRETURN_NO_MATCH;
1618                                 getCharAndAdvance(c, stack.currentFrame->args.subjectPtr);
1619                                 if (isASCIIDigit(c))
1620                                     RRETURN_NO_MATCH;
1621                             }
1622                             break;
1623                             
1624                             case OP_DIGIT:
1625                             for (i = 1; i <= min; i++) {
1626                                 if (stack.currentFrame->args.subjectPtr >= md.end_subject || !isASCIIDigit(*stack.currentFrame->args.subjectPtr++))
1627                                     RRETURN_NO_MATCH;
1628                                 /* No need to skip more bytes - we know it's a 1-byte character */
1629                             }
1630                             break;
1631                             
1632                             case OP_NOT_WHITESPACE:
1633                             for (i = 1; i <= min; i++) {
1634                                 if (stack.currentFrame->args.subjectPtr >= md.end_subject || isSpaceChar(*stack.currentFrame->args.subjectPtr))
1635                                     RRETURN_NO_MATCH;
1636                                 while (++stack.currentFrame->args.subjectPtr < md.end_subject && isTrailingSurrogate(*stack.currentFrame->args.subjectPtr)) { }
1637                             }
1638                             break;
1639                             
1640                             case OP_WHITESPACE:
1641                             for (i = 1; i <= min; i++) {
1642                                 if (stack.currentFrame->args.subjectPtr >= md.end_subject || !isSpaceChar(*stack.currentFrame->args.subjectPtr++))
1643                                     RRETURN_NO_MATCH;
1644                                 /* No need to skip more bytes - we know it's a 1-byte character */
1645                             }
1646                             break;
1647                             
1648                             case OP_NOT_WORDCHAR:
1649                             for (i = 1; i <= min; i++) {
1650                                 if (stack.currentFrame->args.subjectPtr >= md.end_subject || isWordChar(*stack.currentFrame->args.subjectPtr))
1651                                     RRETURN_NO_MATCH;
1652                                 while (++stack.currentFrame->args.subjectPtr < md.end_subject && isTrailingSurrogate(*stack.currentFrame->args.subjectPtr)) { }
1653                             }
1654                             break;
1655                             
1656                             case OP_WORDCHAR:
1657                             for (i = 1; i <= min; i++) {
1658                                 if (stack.currentFrame->args.subjectPtr >= md.end_subject || !isWordChar(*stack.currentFrame->args.subjectPtr++))
1659                                     RRETURN_NO_MATCH;
1660                                 /* No need to skip more bytes - we know it's a 1-byte character */
1661                             }
1662                             break;
1663                             
1664                             default:
1665                             ASSERT_NOT_REACHED();
1666                             return matchError(JSRegExpErrorInternal, stack);
1667                     }  /* End switch(stack.currentFrame->locals.ctype) */
1668                 }
1669                 
1670                 /* If min = max, continue at the same level without recursing */
1671                 
1672                 if (min == stack.currentFrame->locals.max)
1673                     NEXT_OPCODE;    
1674                 
1675                 /* If minimizing, we have to test the rest of the pattern before each
1676                  subsequent match. */
1677                 
1678                 if (minimize) {
1679                     for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1680                         RECURSIVE_MATCH(48, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1681                         if (is_match)
1682                             RRETURN;
1683                         if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.end_subject)
1684                             RRETURN;
1685                         
1686                         getCharAndAdvance(c, stack.currentFrame->args.subjectPtr);
1687                         switch(stack.currentFrame->locals.ctype) {
1688                         case OP_ANY:
1689                             if (isNewline(c))
1690                                 RRETURN;
1691                             break;
1692                             
1693                         case OP_NOT_DIGIT:
1694                             if (isASCIIDigit(c))
1695                                 RRETURN;
1696                             break;
1697                             
1698                         case OP_DIGIT:
1699                             if (!isASCIIDigit(c))
1700                                 RRETURN;
1701                             break;
1702                             
1703                         case OP_NOT_WHITESPACE:
1704                             if (isSpaceChar(c))
1705                                 RRETURN;
1706                             break;
1707                             
1708                         case OP_WHITESPACE:
1709                             if  (!isSpaceChar(c))
1710                                 RRETURN;
1711                             break;
1712                             
1713                         case OP_NOT_WORDCHAR:
1714                             if (isWordChar(c))
1715                                 RRETURN;
1716                             break;
1717                             
1718                         case OP_WORDCHAR:
1719                             if (!isWordChar(c))
1720                                 RRETURN;
1721                             break;
1722                             
1723                         default:
1724                             ASSERT_NOT_REACHED();
1725                             return matchError(JSRegExpErrorInternal, stack);
1726                         }
1727                     }
1728                     /* Control never reaches here */
1729                 }
1730                 
1731                 /* If maximizing it is worth using inline code for speed, doing the type
1732                  test once at the start (i.e. keep it out of the loop). */
1733                 
1734                 else {
1735                     stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;  /* Remember where we started */
1736                     
1737                     switch(stack.currentFrame->locals.ctype) {
1738                         case OP_ANY:
1739                             
1740                             /* Special code is required for UTF8, but when the maximum is unlimited
1741                              we don't need it, so we repeat the non-UTF8 code. This is probably
1742                              worth it, because .* is quite a common idiom. */
1743                             
1744                             if (stack.currentFrame->locals.max < INT_MAX) {
1745                                 for (i = min; i < stack.currentFrame->locals.max; i++) {
1746                                     if (stack.currentFrame->args.subjectPtr >= md.end_subject || isNewline(*stack.currentFrame->args.subjectPtr))
1747                                         break;
1748                                     stack.currentFrame->args.subjectPtr++;
1749                                     while (stack.currentFrame->args.subjectPtr < md.end_subject && (*stack.currentFrame->args.subjectPtr & 0xc0) == 0x80)
1750                                         stack.currentFrame->args.subjectPtr++;
1751                                 }
1752                             }
1753                             
1754                             /* Handle unlimited UTF-8 repeat */
1755                             
1756                             else {
1757                                 for (i = min; i < stack.currentFrame->locals.max; i++) {
1758                                     if (stack.currentFrame->args.subjectPtr >= md.end_subject || isNewline(*stack.currentFrame->args.subjectPtr))
1759                                         break;
1760                                     stack.currentFrame->args.subjectPtr++;
1761                                 }
1762                                 break;
1763                             }
1764                             break;
1765                             
1766                             case OP_NOT_DIGIT:
1767                             for (i = min; i < stack.currentFrame->locals.max; i++) {
1768                                 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1769                                     break;
1770                                 int length;
1771                                 getCharAndLength(c, stack.currentFrame->args.subjectPtr, length);
1772                                 if (isASCIIDigit(c))
1773                                     break;
1774                                 stack.currentFrame->args.subjectPtr += length;
1775                             }
1776                             break;
1777                             
1778                             case OP_DIGIT:
1779                             for (i = min; i < stack.currentFrame->locals.max; i++) {
1780                                 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1781                                     break;
1782                                 int length;
1783                                 getCharAndLength(c, stack.currentFrame->args.subjectPtr, length);
1784                                 if (!isASCIIDigit(c))
1785                                     break;
1786                                 stack.currentFrame->args.subjectPtr += length;
1787                             }
1788                             break;
1789                             
1790                             case OP_NOT_WHITESPACE:
1791                             for (i = min; i < stack.currentFrame->locals.max; i++) {
1792                                 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1793                                     break;
1794                                 int length;
1795                                 getCharAndLength(c, stack.currentFrame->args.subjectPtr, length);
1796                                 if (isSpaceChar(c))
1797                                     break;
1798                                 stack.currentFrame->args.subjectPtr += length;
1799                             }
1800                             break;
1801                             
1802                             case OP_WHITESPACE:
1803                             for (i = min; i < stack.currentFrame->locals.max; i++) {
1804                                 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1805                                     break;
1806                                 int length;
1807                                 getCharAndLength(c, stack.currentFrame->args.subjectPtr, length);
1808                                 if (!isSpaceChar(c))
1809                                     break;
1810                                 stack.currentFrame->args.subjectPtr += length;
1811                             }
1812                             break;
1813                             
1814                             case OP_NOT_WORDCHAR:
1815                             for (i = min; i < stack.currentFrame->locals.max; i++) {
1816                                 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1817                                     break;
1818                                 int length;
1819                                 getCharAndLength(c, stack.currentFrame->args.subjectPtr, length);
1820                                 if (isWordChar(c))
1821                                     break;
1822                                 stack.currentFrame->args.subjectPtr += length;
1823                             }
1824                             break;
1825                             
1826                             case OP_WORDCHAR:
1827                             for (i = min; i < stack.currentFrame->locals.max; i++) {
1828                                 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1829                                     break;
1830                                 int length;
1831                                 getCharAndLength(c, stack.currentFrame->args.subjectPtr, length);
1832                                 if (!isWordChar(c))
1833                                     break;
1834                                 stack.currentFrame->args.subjectPtr += length;
1835                             }
1836                             break;
1837                             
1838                             default:
1839                             ASSERT_NOT_REACHED();
1840                             return matchError(JSRegExpErrorInternal, stack);
1841                     }
1842                     
1843                     /* stack.currentFrame->args.subjectPtr is now past the end of the maximum run */
1844                     
1845                     for (;;) {
1846                         RECURSIVE_MATCH(52, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1847                         if (is_match)
1848                             RRETURN;
1849                         if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
1850                             break;        /* Stop if tried at original pos */
1851                         BACKCHAR(stack.currentFrame->args.subjectPtr);
1852                     }
1853                     
1854                     /* Get here if we can't make it match with any permitted repetitions */
1855                     
1856                     RRETURN;
1857                 }
1858                 /* Control never reaches here */
1859                 
1860                 BEGIN_OPCODE(CRMINPLUS):
1861                 BEGIN_OPCODE(CRMINQUERY):
1862                 BEGIN_OPCODE(CRMINRANGE):
1863                 BEGIN_OPCODE(CRMINSTAR):
1864                 BEGIN_OPCODE(CRPLUS):
1865                 BEGIN_OPCODE(CRQUERY):
1866                 BEGIN_OPCODE(CRRANGE):
1867                 BEGIN_OPCODE(CRSTAR):
1868                 ASSERT_NOT_REACHED();
1869                 return matchError(JSRegExpErrorInternal, stack);
1870                 
1871 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
1872             CAPTURING_BRACKET:
1873 #else
1874                 default:
1875 #endif
1876                 /* Opening capturing bracket. If there is space in the offset vector, save
1877                  the current subject position in the working slot at the top of the vector. We
1878                  mustn't change the current values of the data slot, because they may be set
1879                  from a previous iteration of this group, and be referred to by a reference
1880                  inside the group.
1881                  
1882                  If the bracket fails to match, we need to restore this value and also the
1883                  values of the final offsets, in case they were set by a previous iteration of
1884                  the same bracket.
1885                  
1886                  If there isn't enough space in the offset vector, treat this as if it were a
1887                  non-capturing bracket. Don't worry about setting the flag for the error case
1888                  here; that is handled in the code for KET. */
1889                 
1890                 ASSERT(*stack.currentFrame->args.instructionPtr > OP_BRA);
1891                 
1892                 stack.currentFrame->locals.number = *stack.currentFrame->args.instructionPtr - OP_BRA;
1893                 
1894                 /* For extended extraction brackets (large number), we have to fish out the
1895                  number from a dummy opcode at the start. */
1896                 
1897                 if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX)
1898                     stack.currentFrame->locals.number = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 2+LINK_SIZE);
1899                 stack.currentFrame->locals.offset = stack.currentFrame->locals.number << 1;
1900                 
1901 #ifdef DEBUG
1902                 printf("start bracket %d subject=", stack.currentFrame->locals.number);
1903                 pchars(stack.currentFrame->args.subjectPtr, 16, true, md);
1904                 printf("\n");
1905 #endif
1906                 
1907                 if (stack.currentFrame->locals.offset < md.offset_max) {
1908                     stack.currentFrame->locals.save_offset1 = md.offset_vector[stack.currentFrame->locals.offset];
1909                     stack.currentFrame->locals.save_offset2 = md.offset_vector[stack.currentFrame->locals.offset + 1];
1910                     stack.currentFrame->locals.save_offset3 = md.offset_vector[md.offset_end - stack.currentFrame->locals.number];
1911                     
1912                     DPRINTF(("saving %d %d %d\n", stack.currentFrame->locals.save_offset1, stack.currentFrame->locals.save_offset2, stack.currentFrame->locals.save_offset3));
1913                     md.offset_vector[md.offset_end - stack.currentFrame->locals.number] = stack.currentFrame->args.subjectPtr - md.start_subject;
1914                     
1915                     do {
1916                         RECURSIVE_MATCH_STARTNG_NEW_GROUP(1, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.subpatternStart);
1917                         if (is_match)
1918                             RRETURN;
1919                         stack.currentFrame->args.instructionPtr += getOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
1920                     } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
1921                     
1922                     DPRINTF(("bracket %d failed\n", stack.currentFrame->locals.number));
1923                     
1924                     md.offset_vector[stack.currentFrame->locals.offset] = stack.currentFrame->locals.save_offset1;
1925                     md.offset_vector[stack.currentFrame->locals.offset + 1] = stack.currentFrame->locals.save_offset2;
1926                     md.offset_vector[md.offset_end - stack.currentFrame->locals.number] = stack.currentFrame->locals.save_offset3;
1927                     
1928                     RRETURN;
1929                 }
1930                 
1931                 /* Insufficient room for saving captured contents */
1932                 
1933                 goto NON_CAPTURING_BRACKET;
1934         }
1935         
1936         /* Do not stick any code in here without much thought; it is assumed
1937          that "continue" in the code above comes out to here to repeat the main
1938          loop. */
1939         
1940     } /* End of main loop */
1941     
1942     ASSERT_NOT_REACHED();
1943     
1944 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
1945     
1946 RRETURN_SWITCH:
1947     switch (stack.currentFrame->returnLocation)
1948     {
1949         case 0: goto RETURN;
1950         case 1: goto RRETURN_1;
1951         case 2: goto RRETURN_2;
1952         case 6: goto RRETURN_6;
1953         case 7: goto RRETURN_7;
1954         case 9: goto RRETURN_9;
1955         case 10: goto RRETURN_10;
1956         case 11: goto RRETURN_11;
1957         case 12: goto RRETURN_12;
1958         case 13: goto RRETURN_13;
1959         case 14: goto RRETURN_14;
1960         case 15: goto RRETURN_15;
1961         case 16: goto RRETURN_16;
1962         case 17: goto RRETURN_17;
1963         case 18: goto RRETURN_18;
1964         case 19: goto RRETURN_19;
1965         case 20: goto RRETURN_20;
1966         case 21: goto RRETURN_21;
1967         case 22: goto RRETURN_22;
1968         case 24: goto RRETURN_24;
1969         case 26: goto RRETURN_26;
1970         case 27: goto RRETURN_27;
1971         case 28: goto RRETURN_28;
1972         case 29: goto RRETURN_29;
1973         case 30: goto RRETURN_30;
1974         case 31: goto RRETURN_31;
1975         case 38: goto RRETURN_38;
1976         case 40: goto RRETURN_40;
1977         case 42: goto RRETURN_42;
1978         case 44: goto RRETURN_44;
1979         case 48: goto RRETURN_48;
1980         case 52: goto RRETURN_52;
1981     }
1982     
1983     ASSERT_NOT_REACHED();
1984     return matchError(JSRegExpErrorInternal, stack);
1985     
1986 #endif
1987     
1988 RETURN:
1989     return is_match ? MATCH_MATCH : MATCH_NOMATCH;
1990 }
1991
1992
1993 /*************************************************
1994 *         Execute a Regular Expression           *
1995 *************************************************/
1996
1997 /* This function applies a compiled re to a subject string and picks out
1998 portions of the string if it matches. Two elements in the vector are set for
1999 each substring: the offsets to the start and end of the substring.
2000
2001 Arguments:
2002   re              points to the compiled expression
2003   extra_data      points to extra data or is NULL
2004   subject         points to the subject string
2005   length          length of subject string (may contain binary zeros)
2006   start_offset    where to start in the subject string
2007   options         option bits
2008   offsets         points to a vector of ints to be filled in with offsets
2009   offsetcount     the number of elements in the vector
2010
2011 Returns:          > 0 => success; value is the number of elements filled in
2012                   = 0 => success, but offsets is not big enough
2013                    -1 => failed to match
2014                  < -1 => some kind of unexpected problem
2015 */
2016
2017 int jsRegExpExecute(const JSRegExp* re,
2018                     const UChar* subject, int length, int start_offset, int* offsets,
2019                     int offsetcount)
2020 {
2021     ASSERT(re);
2022     ASSERT(subject);
2023     ASSERT(offsetcount >= 0);
2024     ASSERT(offsets || offsetcount == 0);
2025     
2026     MatchData match_block;
2027     match_block.start_subject = (UChar*)subject;
2028     match_block.end_subject = match_block.start_subject + length;
2029     UChar* end_subject = match_block.end_subject;
2030     
2031     match_block.multiline = (re->options & PCRE_MULTILINE);
2032     match_block.ignoreCase = (re->options & OptionIgnoreCase);
2033     
2034     /* If the expression has got more back references than the offsets supplied can
2035      hold, we get a temporary chunk of working store to use during the matching.
2036      Otherwise, we can use the vector supplied, rounding down its size to a multiple
2037      of 3. */
2038     
2039     int ocount = offsetcount - (offsetcount % 3);
2040     
2041     bool using_temporary_offsets = false;
2042     if (re->top_backref > 0 && re->top_backref >= ocount/3) {
2043         ocount = re->top_backref * 3 + 3;
2044         match_block.offset_vector = new int[ocount];
2045         if (!match_block.offset_vector)
2046             return JSRegExpErrorNoMemory;
2047         using_temporary_offsets = true;
2048     } else
2049         match_block.offset_vector = offsets;
2050     
2051     match_block.offset_end = ocount;
2052     match_block.offset_max = (2*ocount)/3;
2053     match_block.offset_overflow = false;
2054     
2055     /* Compute the minimum number of offsets that we need to reset each time. Doing
2056      this makes a huge difference to execution time when there aren't many brackets
2057      in the pattern. */
2058     
2059     int resetcount = 2 + re->top_bracket * 2;
2060     if (resetcount > offsetcount)
2061         resetcount = ocount;
2062     
2063     /* Reset the working variable associated with each extraction. These should
2064      never be used unless previously set, but they get saved and restored, and so we
2065      initialize them to avoid reading uninitialized locations. */
2066     
2067     if (match_block.offset_vector) {
2068         int* iptr = match_block.offset_vector + ocount;
2069         int* iend = iptr - resetcount/2 + 1;
2070         while (--iptr >= iend)
2071             *iptr = -1;
2072     }
2073     
2074     /* Set up the first character to match, if available. The first_byte value is
2075      never set for an anchored regular expression, but the anchoring may be forced
2076      at run time, so we have to test for anchoring. The first char may be unset for
2077      an unanchored pattern, of course. If there's no first char and the pattern was
2078      studied, there may be a bitmap of possible first characters. */
2079     
2080     bool first_byte_caseless = false;
2081     int first_byte = -1;
2082     if (re->options & PCRE_FIRSTSET) {
2083         first_byte = re->first_byte & 255;
2084         if ((first_byte_caseless = (re->first_byte & REQ_IGNORE_CASE)))
2085             first_byte = toLowerCase(first_byte);
2086     }
2087     
2088     /* For anchored or unanchored matches, there may be a "last known required
2089      character" set. */
2090     
2091     bool req_byte_caseless = false;
2092     int req_byte = -1;
2093     int req_byte2 = -1;
2094     if (re->options & PCRE_REQCHSET) {
2095         req_byte = re->req_byte & 255;
2096         req_byte_caseless = (re->req_byte & REQ_IGNORE_CASE);
2097         req_byte2 = flipCase(req_byte);
2098     }
2099     
2100     /* Loop for handling unanchored repeated matching attempts; for anchored regexs
2101      the loop runs just once. */
2102     
2103     UChar* start_match = (UChar*)subject + start_offset;
2104     UChar* req_byte_ptr = start_match - 1;
2105     bool useMultiLineFirstCharOptimization = re->options & OptionUseMultiLineFirstCharOptimization;
2106     
2107     do {
2108         UChar* save_end_subject = end_subject;
2109         
2110         /* Reset the maximum number of extractions we might see. */
2111         
2112         if (match_block.offset_vector) {
2113             int* iptr = match_block.offset_vector;
2114             int* iend = iptr + resetcount;
2115             while (iptr < iend)
2116                 *iptr++ = -1;
2117         }
2118         
2119         /* Advance to a unique first char if possible. If firstline is true, the
2120          start of the match is constrained to the first line of a multiline string.
2121          Implement this by temporarily adjusting end_subject so that we stop scanning
2122          at a newline. If the match fails at the newline, later code breaks this loop.
2123          */
2124         
2125         /* Now test for a unique first byte */
2126         
2127         if (first_byte >= 0) {
2128             UChar first_char = first_byte;
2129             if (first_byte_caseless)
2130                 while (start_match < end_subject) {
2131                     int sm = *start_match;
2132                     if (sm > 127)
2133                         break;
2134                     if (toLowerCase(sm) == first_char)
2135                         break;
2136                     start_match++;
2137                 }
2138             else
2139                 while (start_match < end_subject && *start_match != first_char)
2140                     start_match++;
2141         }
2142         
2143         /* Or to just after \n for a multiline match if possible */
2144         else if (useMultiLineFirstCharOptimization) {
2145             if (start_match > match_block.start_subject + start_offset) {
2146                 while (start_match < end_subject && !isNewline(start_match[-1]))
2147                     start_match++;
2148             }
2149         }
2150         
2151         /* Restore fudged end_subject */
2152         
2153         end_subject = save_end_subject;
2154         
2155 #ifdef DEBUG  /* Sigh. Some compilers never learn. */
2156         printf(">>>> Match against: ");
2157         pchars(start_match, end_subject - start_match, true, &match_block);
2158         printf("\n");
2159 #endif
2160         
2161         /* If req_byte is set, we know that that character must appear in the subject
2162          for the match to succeed. If the first character is set, req_byte must be
2163          later in the subject; otherwise the test starts at the match point. This
2164          optimization can save a huge amount of backtracking in patterns with nested
2165          unlimited repeats that aren't going to match. Writing separate code for
2166          cased/caseless versions makes it go faster, as does using an autoincrement
2167          and backing off on a match.
2168          
2169          HOWEVER: when the subject string is very, very long, searching to its end can
2170          take a long time, and give bad performance on quite ordinary patterns. This
2171          showed up when somebody was matching /^C/ on a 32-megabyte string... so we
2172          don't do this when the string is sufficiently long.
2173          
2174          ALSO: this processing is disabled when partial matching is requested.
2175          */
2176         
2177         if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX) {
2178             UChar* p = start_match + ((first_byte >= 0)? 1 : 0);
2179             
2180             /* We don't need to repeat the search if we haven't yet reached the
2181              place we found it at last time. */
2182             
2183             if (p > req_byte_ptr) {
2184                 if (req_byte_caseless) {
2185                     while (p < end_subject) {
2186                         int pp = *p++;
2187                         if (pp == req_byte || pp == req_byte2) {
2188                             p--;
2189                             break;
2190                         }
2191                     }
2192                 } else {
2193                     while (p < end_subject) {
2194                         if (*p++ == req_byte) {
2195                             p--;
2196                             break;
2197                         }
2198                     }
2199                 }
2200                 
2201                 /* If we can't find the required character, break the matching loop */
2202                 
2203                 if (p >= end_subject)
2204                     break;
2205                 
2206                 /* If we have found the required character, save the point where we
2207                  found it, so that we don't search again next time round the loop if
2208                  the start hasn't passed this character yet. */
2209                 
2210                 req_byte_ptr = p;
2211             }
2212         }
2213         
2214         /* When a match occurs, substrings will be set for all internal extractions;
2215          we just need to set up the whole thing as substring 0 before returning. If
2216          there were too many extractions, set the return code to zero. In the case
2217          where we had to get some local store to hold offsets for backreferences, copy
2218          those back references that we can. In this case there need not be overflow
2219          if certain parts of the pattern were not used. */
2220         
2221         /* The code starts after the JSRegExp block and the capture name table. */
2222         const uschar* start_code = (const uschar*)(re + 1);
2223         
2224         int returnCode = match(start_match, start_code, 2, match_block);
2225         
2226         /* When the result is no match, if the subject's first character was a
2227          newline and the PCRE_FIRSTLINE option is set, break (which will return
2228          PCRE_ERROR_NOMATCH). The option requests that a match occur before the first
2229          newline in the subject. Otherwise, advance the pointer to the next character
2230          and continue - but the continuation will actually happen only when the
2231          pattern is not anchored. */
2232         
2233         if (returnCode == MATCH_NOMATCH) {
2234             start_match++;
2235             while(start_match < end_subject && isTrailingSurrogate(*start_match))
2236                 start_match++;
2237             continue;
2238         }
2239         
2240         if (returnCode != MATCH_MATCH) {
2241             DPRINTF((">>>> error: returning %d\n", rc));
2242             return returnCode;
2243         }
2244         
2245         /* We have a match! Copy the offset information from temporary store if
2246          necessary */
2247         
2248         if (using_temporary_offsets) {
2249             if (offsetcount >= 4) {
2250                 memcpy(offsets + 2, match_block.offset_vector + 2, (offsetcount - 2) * sizeof(int));
2251                 DPRINTF(("Copied offsets from temporary memory\n"));
2252             }
2253             if (match_block.end_offset_top > offsetcount)
2254                 match_block.offset_overflow = true;
2255             
2256             DPRINTF(("Freeing temporary memory\n"));
2257             delete [] match_block.offset_vector;
2258         }
2259         
2260         returnCode = match_block.offset_overflow? 0 : match_block.end_offset_top/2;
2261         
2262         if (offsetcount < 2)
2263             returnCode = 0;
2264         else {
2265             offsets[0] = start_match - match_block.start_subject;
2266             offsets[1] = match_block.end_match_ptr - match_block.start_subject;
2267         }
2268         
2269         DPRINTF((">>>> returning %d\n", rc));
2270         return returnCode;
2271     } while (start_match <= end_subject);
2272     
2273     if (using_temporary_offsets) {
2274         DPRINTF(("Freeing temporary memory\n"));
2275         delete [] match_block.offset_vector;
2276     }
2277     
2278     DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
2279     return JSRegExpErrorNoMatch;
2280 }