Windows build fix
[WebKit-https.git] / JavaScriptCore / pcre / pcre_exec.cpp
1 /* This is JavaScriptCore's variant of the PCRE library. While this library
2 started out as a copy of PCRE, many of the features of PCRE have been
3 removed. This library now supports only the regular expression features
4 required by the JavaScript language specification, and has only the functions
5 needed by JavaScriptCore and the rest of WebKit.
6
7                  Originally written by Philip Hazel
8            Copyright (c) 1997-2006 University of Cambridge
9     Copyright (C) 2002, 2004, 2006, 2007 Apple Inc. All rights reserved.
10     Copyright (C) 2007 Eric Seidel <eric@webkit.org>
11
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40
41 /* This module contains jsRegExpExecute(), the externally visible function
42 that does pattern matching using an NFA algorithm, following the rules from
43 the JavaScript specification. There are also some supporting functions. */
44
45 #include "config.h"
46
47 #include "pcre_internal.h"
48
49 #include <wtf/ASCIICType.h>
50 #include <wtf/Vector.h>
51
52 using namespace WTF;
53
54 #ifdef __GNUC__
55 #define USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
56 //#define USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
57 #endif
58
59 /* Avoid warnings on Windows. */
60 #undef min
61 #undef max
62
63 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
64 typedef int ReturnLocation;
65 #else
66 typedef void* ReturnLocation;
67 #endif
68
69 struct MatchFrame {
70     ReturnLocation returnLocation;
71     struct MatchFrame* previousFrame;
72     
73     /* Function arguments that may change */
74     struct {
75         const UChar* subjectPtr;
76         const uschar* instructionPtr;
77         int offset_top;
78         const UChar* subpatternStart;
79     } args;
80     
81     
82     /* PCRE uses "fake" recursion built off of gotos, thus
83      stack-based local variables are not safe to use.  Instead we have to
84      store local variables on the current MatchFrame. */
85     struct {
86         const uschar* data;
87         const uschar* startOfRepeatingBracket;
88         const UChar* subjectPtrAtStartOfInstruction; // Several instrutions stash away a subjectPtr here for later compare
89         const uschar* instructionPtrAtStartOfOnce;
90         
91         int repeat_othercase;
92         
93         int ctype;
94         int fc;
95         int fi;
96         int length;
97         int max;
98         int number;
99         int offset;
100         int save_offset1;
101         int save_offset2;
102         int save_offset3;
103         
104         const UChar* subpatternStart;
105     } locals;
106 };
107
108 /* Structure for passing "static" information around between the functions
109 doing traditional NFA matching, so that they are thread-safe. */
110
111 struct MatchData {
112   int*   offset_vector;         /* Offset vector */
113   int    offset_end;            /* One past the end */
114   int    offset_max;            /* The maximum usable for return data */
115   bool   offset_overflow;       /* Set if too many extractions */
116   const UChar*  start_subject;         /* Start of the subject string */
117   const UChar*  end_subject;           /* End of the subject string */
118   const UChar*  end_match_ptr;         /* Subject position at end match */
119   int    end_offset_top;        /* Highwater mark at end of match */
120   bool   multiline;
121   bool   ignoreCase;
122 };
123
124 /* Non-error returns from the match() function. Error returns are externally
125 defined PCRE_ERROR_xxx codes, which are all negative. */
126
127 #define MATCH_MATCH        1
128 #define MATCH_NOMATCH      0
129
130 #ifdef DEBUG
131 /*************************************************
132 *        Debugging function to print chars       *
133 *************************************************/
134
135 /* Print a sequence of chars in printable format, stopping at the end of the
136 subject if the requested.
137
138 Arguments:
139   p           points to characters
140   length      number to print
141   is_subject  true if printing from within md.start_subject
142   md          pointer to matching data block, if is_subject is true
143 */
144
145 static void pchars(const UChar* p, int length, bool is_subject, const MatchData& md)
146 {
147     if (is_subject && length > md.end_subject - p)
148         length = md.end_subject - p;
149     while (length-- > 0) {
150         int c;
151         if (isprint(c = *(p++)))
152             printf("%c", c);
153         else if (c < 256)
154             printf("\\x%02x", c);
155         else
156             printf("\\x{%x}", c);
157     }
158 }
159 #endif
160
161
162
163 /*************************************************
164 *          Match a back-reference                *
165 *************************************************/
166
167 /* If a back reference hasn't been set, the length that is passed is greater
168 than the number of characters left in the string, so the match fails.
169
170 Arguments:
171   offset      index into the offset vector
172   subjectPtr        points into the subject
173   length      length to be matched
174   md          points to match data block
175
176 Returns:      true if matched
177 */
178
179 static bool match_ref(int offset, const UChar* subjectPtr, int length, const MatchData& md)
180 {
181     const UChar* p = md.start_subject + md.offset_vector[offset];
182     
183 #ifdef DEBUG
184     if (subjectPtr >= md.end_subject)
185         printf("matching subject <null>");
186     else {
187         printf("matching subject ");
188         pchars(subjectPtr, length, true, md);
189     }
190     printf(" against backref ");
191     pchars(p, length, false, md);
192     printf("\n");
193 #endif
194     
195     /* Always fail if not enough characters left */
196     
197     if (length > md.end_subject - subjectPtr)
198         return false;
199     
200     /* Separate the caselesss case for speed */
201     
202     if (md.ignoreCase) {
203         while (length-- > 0) {
204             UChar c = *p++;
205             int othercase = _pcre_ucp_othercase(c);
206             UChar d = *subjectPtr++;
207             if (c != d && othercase != d)
208                 return false;
209         }
210     }
211     else {
212         while (length-- > 0)
213             if (*p++ != *subjectPtr++)
214                 return false;
215     }
216     
217     return true;
218 }
219
220 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
221
222 /* Use numbered labels and switch statement at the bottom of the match function. */
223
224 #define RMATCH_WHERE(num) num
225 #define RRETURN_LABEL RRETURN_SWITCH
226
227 #else
228
229 /* Use GCC's computed goto extension. */
230
231 /* For one test case this is more than 40% faster than the switch statement.
232 We could avoid the use of the num argument entirely by using local labels,
233 but using it for the GCC case as well as the non-GCC case allows us to share
234 a bit more code and notice if we use conflicting numbers.*/
235
236 #define RMATCH_WHERE(num) &&RRETURN_##num
237 #define RRETURN_LABEL *stack.currentFrame->returnLocation
238
239 #endif
240
241 #define CHECK_RECURSION_LIMIT \
242     if (stack.size >= MATCH_LIMIT_RECURSION) \
243         return matchError(JSRegExpErrorRecursionLimit, stack);
244
245 #define RECURSE_WITH_RETURN_NUMBER(num) \
246     CHECK_RECURSION_LIMIT \
247     goto RECURSE;\
248     RRETURN_##num:
249
250 #define RECURSIVE_MATCH(num, ra, rb) \
251 {\
252     stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \
253     RECURSE_WITH_RETURN_NUMBER(num) \
254     stack.popCurrentFrame(); \
255 }
256
257 #define RECURSIVE_MATCH_STARTNG_NEW_GROUP(num, ra, rb) \
258 {\
259     stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \
260     startNewGroup(stack.currentFrame); \
261     RECURSE_WITH_RETURN_NUMBER(num) \
262     stack.popCurrentFrame(); \
263 }
264
265 #define RRETURN goto RRETURN_LABEL
266
267 #define RRETURN_NO_MATCH \
268   {\
269     is_match = false;\
270     RRETURN;\
271   }
272
273 /*************************************************
274 *         Match from current position            *
275 *************************************************/
276
277 /* On entry instructionPtr points to the first opcode, and subjectPtr to the first character
278 in the subject string, while substringStart holds the value of subjectPtr at the start of the
279 last bracketed group - used for breaking infinite loops matching zero-length
280 strings. This function is called recursively in many circumstances. Whenever it
281 returns a negative (error) response, the outer match() call must also return the
282 same response.
283
284 Arguments:
285    subjectPtr        pointer in subject
286    instructionPtr       position in code
287    offset_top  current top pointer
288    md          pointer to "static" info for the match
289
290 Returns:       MATCH_MATCH if matched            )  these values are >= 0
291                MATCH_NOMATCH if failed to match  )
292                a negative PCRE_ERROR_xxx value if aborted by an error condition
293                  (e.g. stopped by repeated call or recursion limit)
294 */
295
296 static const unsigned FRAMES_ON_STACK = 16;
297
298 struct MatchStack {
299     MatchStack()
300         : framesEnd(frames + FRAMES_ON_STACK)
301         , currentFrame(frames)
302         , size(1) // match() creates accesses the first frame w/o calling pushNewFrame
303     {
304         ASSERT((sizeof(frames) / sizeof(frames[0])) == FRAMES_ON_STACK);
305     }
306     
307     MatchFrame frames[FRAMES_ON_STACK];
308     MatchFrame* framesEnd;
309     MatchFrame* currentFrame;
310     unsigned size;
311     
312     inline bool canUseStackBufferForNextFrame()
313     {
314         return size < FRAMES_ON_STACK;
315     }
316     
317     inline MatchFrame* allocateNextFrame()
318     {
319         if (canUseStackBufferForNextFrame())
320             return currentFrame + 1;
321         return new MatchFrame;
322     }
323     
324     inline void pushNewFrame(const uschar* instructionPtr, const UChar* subpatternStart, ReturnLocation returnLocation)
325     {
326         MatchFrame* newframe = allocateNextFrame();
327         newframe->previousFrame = currentFrame;
328
329         newframe->args.subjectPtr = currentFrame->args.subjectPtr;
330         newframe->args.offset_top = currentFrame->args.offset_top;
331         newframe->args.instructionPtr = instructionPtr;
332         newframe->args.subpatternStart = subpatternStart;
333         newframe->returnLocation = returnLocation;
334         size++;
335
336         currentFrame = newframe;
337     }
338     
339     inline void popCurrentFrame()
340     {
341         MatchFrame* oldFrame = currentFrame;
342         currentFrame = currentFrame->previousFrame;
343         if (size > FRAMES_ON_STACK)
344             delete oldFrame;
345         size--;
346     }
347
348     void popAllFrames()
349     {
350         while (size)
351             popCurrentFrame();
352     }
353 };
354
355 static int matchError(int errorCode, MatchStack& stack)
356 {
357     stack.popAllFrames();
358     return errorCode;
359 }
360
361 /* Get the next UTF-8 character, not advancing the pointer, incrementing length
362  if there are extra bytes. This is called when we know we are in UTF-8 mode. */
363
364 static inline void getUTF8CharAndIncrementLength(int& c, const uschar* subjectPtr, int& len)
365 {
366     c = *subjectPtr;
367     if ((c & 0xc0) == 0xc0) {
368         int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */
369         int gcss = 6 * gcaa;
370         c = (c & _pcre_utf8_table3[gcaa]) << gcss;
371         for (int gcii = 1; gcii <= gcaa; gcii++) {
372             gcss -= 6;
373             c |= (subjectPtr[gcii] & 0x3f) << gcss;
374         }
375         len += gcaa;
376     }
377 }
378
379 static inline void startNewGroup(MatchFrame* currentFrame)
380 {
381     /* At the start of a bracketed group, add the current subject pointer to the
382      stack of such pointers, to be re-instated at the end of the group when we hit
383      the closing ket. When match() is called in other circumstances, we don't add to
384      this stack. */
385     
386     currentFrame->locals.subpatternStart = currentFrame->args.subpatternStart;
387 }
388
389 // FIXME: "minimize" means "not greedy", we should invert the callers to ask for "greedy" to be less confusing
390 static inline void repeatInformationFromInstructionOffset(short instructionOffset, bool& minimize, int& minimumRepeats, int& maximumRepeats)
391 {
392     // Instruction offsets are based off of OP_CRSTAR, OP_STAR, OP_TYPESTAR, OP_NOTSTAR
393     static const char minimumRepeatsFromInstructionOffset[] = { 0, 0, 1, 1, 0, 0 };
394     static const int maximumRepeatsFromInstructionOffset[] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX, 1, 1 };
395
396     ASSERT(instructionOffset >= 0);
397     ASSERT(instructionOffset <= (OP_CRMINQUERY - OP_CRSTAR));
398
399     minimize = (instructionOffset & 1); // this assumes ordering: Instruction, MinimizeInstruction, Instruction2, MinimizeInstruction2
400     minimumRepeats = minimumRepeatsFromInstructionOffset[instructionOffset];
401     maximumRepeats = maximumRepeatsFromInstructionOffset[instructionOffset];
402 }
403
404 static int match(const UChar* subjectPtr, const uschar* instructionPtr, int offset_top, MatchData& md)
405 {
406     int is_match = false;
407     int min;
408     bool minimize = false; /* Initialization not really needed, but some compilers think so. */
409     
410     MatchStack stack;
411
412     /* The opcode jump table. */
413 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
414 #define EMIT_JUMP_TABLE_ENTRY(opcode) &&LABEL_OP_##opcode,
415     static void* opcode_jump_table[256] = { FOR_EACH_OPCODE(EMIT_JUMP_TABLE_ENTRY) };
416 #undef EMIT_JUMP_TABLE_ENTRY
417 #endif
418     
419     /* One-time setup of the opcode jump table. */
420 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
421     for (int i = 255; !opcode_jump_table[i]; i--)
422             opcode_jump_table[i] = &&CAPTURING_BRACKET;
423 #endif
424     
425 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
426     // Shark shows this as a hot line
427     // Using a static const here makes this line disappear, but makes later access hotter (not sure why)
428     stack.currentFrame->returnLocation = &&RETURN;
429 #else
430     stack.currentFrame->returnLocation = 0;
431 #endif
432     stack.currentFrame->args.subjectPtr = subjectPtr;
433     stack.currentFrame->args.instructionPtr = instructionPtr;
434     stack.currentFrame->args.offset_top = offset_top;
435     stack.currentFrame->args.subpatternStart = 0;
436     startNewGroup(stack.currentFrame);
437     
438     /* This is where control jumps back to to effect "recursion" */
439     
440 RECURSE:
441
442     /* Now start processing the operations. */
443     
444 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
445     while (true)
446 #endif
447     {
448         
449 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
450 #define BEGIN_OPCODE(opcode) LABEL_OP_##opcode
451 #define NEXT_OPCODE goto *opcode_jump_table[*stack.currentFrame->args.instructionPtr]
452 #else
453 #define BEGIN_OPCODE(opcode) case OP_##opcode
454 #define NEXT_OPCODE continue
455 #endif
456         
457 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
458         NEXT_OPCODE;
459 #else
460         switch (*stack.currentFrame->args.instructionPtr)
461 #endif
462         {
463                 /* Non-capturing bracket: optimized */
464                 
465                 BEGIN_OPCODE(BRA):
466             NON_CAPTURING_BRACKET:
467                 DPRINTF(("start bracket 0\n"));
468                 do {
469                     RECURSIVE_MATCH_STARTNG_NEW_GROUP(2, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.subpatternStart);
470                     if (is_match)
471                         RRETURN;
472                     stack.currentFrame->args.instructionPtr += getOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
473                 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
474                 DPRINTF(("bracket 0 failed\n"));
475                 RRETURN;
476                 
477                 /* Skip over large extraction number data if encountered. */
478                 
479                 BEGIN_OPCODE(BRANUMBER):
480                 stack.currentFrame->args.instructionPtr += 3;
481                 NEXT_OPCODE;
482                 
483                 /* End of the pattern. */
484                 
485                 BEGIN_OPCODE(END):
486                 md.end_match_ptr = stack.currentFrame->args.subjectPtr;          /* Record where we ended */
487                 md.end_offset_top = stack.currentFrame->args.offset_top;   /* and how many extracts were taken */
488                 is_match = true;
489                 RRETURN;
490                 
491                 /* Assertion brackets. Check the alternative branches in turn - the
492                  matching won't pass the KET for an assertion. If any one branch matches,
493                  the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
494                  start of each branch to move the current point backwards, so the code at
495                  this level is identical to the lookahead case. */
496                 
497                 BEGIN_OPCODE(ASSERT):
498                 do {
499                     RECURSIVE_MATCH_STARTNG_NEW_GROUP(6, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, NULL);
500                     if (is_match)
501                         break;
502                     stack.currentFrame->args.instructionPtr += getOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
503                 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
504                 if (*stack.currentFrame->args.instructionPtr == OP_KET)
505                     RRETURN_NO_MATCH;
506                 
507                 /* Continue from after the assertion, updating the offsets high water
508                  mark, since extracts may have been taken during the assertion. */
509                 
510                 moveOpcodePtrPastAnyAlternateBranches(stack.currentFrame->args.instructionPtr);
511                 stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
512                 stack.currentFrame->args.offset_top = md.end_offset_top;
513                 NEXT_OPCODE;
514                 
515                 /* Negative assertion: all branches must fail to match */
516                 
517                 BEGIN_OPCODE(ASSERT_NOT):
518                 do {
519                     RECURSIVE_MATCH_STARTNG_NEW_GROUP(7, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, NULL);
520                     if (is_match)
521                         RRETURN_NO_MATCH;
522                     stack.currentFrame->args.instructionPtr += getOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
523                 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
524                 
525                 stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
526                 NEXT_OPCODE;
527                 
528                 /* "Once" brackets are like assertion brackets except that after a match,
529                  the point in the subject string is not moved back. Thus there can never be
530                  a move back into the brackets. Friedl calls these "atomic" subpatterns.
531                  Check the alternative branches in turn - the matching won't pass the KET
532                  for this kind of subpattern. If any one branch matches, we carry on as at
533                  the end of a normal bracket, leaving the subject pointer. */
534                 
535                 BEGIN_OPCODE(ONCE):
536                 stack.currentFrame->locals.instructionPtrAtStartOfOnce = stack.currentFrame->args.instructionPtr;
537                 stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
538                 
539                 do {
540                     RECURSIVE_MATCH_STARTNG_NEW_GROUP(9, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.subpatternStart);
541                     if (is_match)
542                         break;
543                     stack.currentFrame->args.instructionPtr += getOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
544                 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
545                 
546                 /* If hit the end of the group (which could be repeated), fail */
547                 
548                 if (*stack.currentFrame->args.instructionPtr != OP_ONCE && *stack.currentFrame->args.instructionPtr != OP_ALT)
549                     RRETURN;
550                 
551                 /* Continue as from after the assertion, updating the offsets high water
552                  mark, since extracts may have been taken. */
553                 
554                 moveOpcodePtrPastAnyAlternateBranches(stack.currentFrame->args.instructionPtr);
555                 
556                 stack.currentFrame->args.offset_top = md.end_offset_top;
557                 stack.currentFrame->args.subjectPtr = md.end_match_ptr;
558                 
559                 /* For a non-repeating ket, just continue at this level. This also
560                  happens for a repeating ket if no characters were matched in the group.
561                  This is the forcible breaking of infinite loops as implemented in Perl
562                  5.005. If there is an options reset, it will get obeyed in the normal
563                  course of events. */
564                 
565                 if (*stack.currentFrame->args.instructionPtr == OP_KET || stack.currentFrame->args.subjectPtr == stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
566                     stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
567                     NEXT_OPCODE;
568                 }
569                 
570                 /* The repeating kets try the rest of the pattern or restart from the
571                  preceding bracket, in the appropriate order. We need to reset any options
572                  that changed within the bracket before re-running it, so check the next
573                  opcode. */
574                 
575                 if (*stack.currentFrame->args.instructionPtr == OP_KETRMIN) {
576                     RECURSIVE_MATCH(10, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.subpatternStart);
577                     if (is_match)
578                         RRETURN;
579                     RECURSIVE_MATCH_STARTNG_NEW_GROUP(11, stack.currentFrame->locals.instructionPtrAtStartOfOnce, stack.currentFrame->args.subpatternStart);
580                     if (is_match)
581                         RRETURN;
582                 } else { /* OP_KETRMAX */
583                     RECURSIVE_MATCH_STARTNG_NEW_GROUP(12, stack.currentFrame->locals.instructionPtrAtStartOfOnce, stack.currentFrame->args.subpatternStart);
584                     if (is_match)
585                         RRETURN;
586                     RECURSIVE_MATCH(13, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.subpatternStart);
587                     if (is_match)
588                         RRETURN;
589                 }
590                 RRETURN;
591                 
592                 /* An alternation is the end of a branch; scan along to find the end of the
593                  bracketed group and go to there. */
594                 
595                 BEGIN_OPCODE(ALT):
596                 moveOpcodePtrPastAnyAlternateBranches(stack.currentFrame->args.instructionPtr);
597                 NEXT_OPCODE;
598                 
599                 /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
600                  that it may occur zero times. It may repeat infinitely, or not at all -
601                  i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
602                  repeat limits are compiled as a number of copies, with the optional ones
603                  preceded by BRAZERO or BRAMINZERO. */
604                 
605                 BEGIN_OPCODE(BRAZERO):
606                 {
607                     stack.currentFrame->locals.startOfRepeatingBracket = stack.currentFrame->args.instructionPtr + 1;
608                     RECURSIVE_MATCH_STARTNG_NEW_GROUP(14, stack.currentFrame->locals.startOfRepeatingBracket, stack.currentFrame->args.subpatternStart);
609                     if (is_match)
610                         RRETURN;
611                     moveOpcodePtrPastAnyAlternateBranches(stack.currentFrame->locals.startOfRepeatingBracket);
612                     stack.currentFrame->args.instructionPtr = stack.currentFrame->locals.startOfRepeatingBracket + 1 + LINK_SIZE;
613                 }
614                 NEXT_OPCODE;
615                 
616                 BEGIN_OPCODE(BRAMINZERO):
617                 {
618                     stack.currentFrame->locals.startOfRepeatingBracket = stack.currentFrame->args.instructionPtr + 1;
619                     moveOpcodePtrPastAnyAlternateBranches(stack.currentFrame->locals.startOfRepeatingBracket);
620                     RECURSIVE_MATCH_STARTNG_NEW_GROUP(15, stack.currentFrame->locals.startOfRepeatingBracket + 1 + LINK_SIZE, stack.currentFrame->args.subpatternStart);
621                     if (is_match)
622                         RRETURN;
623                     stack.currentFrame->args.instructionPtr++;
624                 }
625                 NEXT_OPCODE;
626                 
627                 /* End of a group, repeated or non-repeating. If we are at the end of
628                  an assertion "group", stop matching and return MATCH_MATCH, but record the
629                  current high water mark for use by positive assertions. Do this also
630                  for the "once" (not-backup up) groups. */
631                 
632                 BEGIN_OPCODE(KET):
633                 BEGIN_OPCODE(KETRMIN):
634                 BEGIN_OPCODE(KETRMAX):
635                 stack.currentFrame->locals.instructionPtrAtStartOfOnce = stack.currentFrame->args.instructionPtr - getOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
636                 stack.currentFrame->args.subpatternStart = stack.currentFrame->locals.subpatternStart;
637                 stack.currentFrame->locals.subpatternStart = stack.currentFrame->previousFrame->args.subpatternStart;
638
639                 if (*stack.currentFrame->locals.instructionPtrAtStartOfOnce == OP_ASSERT || *stack.currentFrame->locals.instructionPtrAtStartOfOnce == OP_ASSERT_NOT || *stack.currentFrame->locals.instructionPtrAtStartOfOnce == OP_ONCE) {
640                     md.end_match_ptr = stack.currentFrame->args.subjectPtr;      /* For ONCE */
641                     md.end_offset_top = stack.currentFrame->args.offset_top;
642                     is_match = true;
643                     RRETURN;
644                 }
645                 
646                 /* In all other cases except a conditional group we have to check the
647                  group number back at the start and if necessary complete handling an
648                  extraction by setting the offsets and bumping the high water mark. */
649                 
650                 stack.currentFrame->locals.number = *stack.currentFrame->locals.instructionPtrAtStartOfOnce - OP_BRA;
651                 
652                 /* For extended extraction brackets (large number), we have to fish out
653                  the number from a dummy opcode at the start. */
654                 
655                 if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX)
656                     stack.currentFrame->locals.number = get2ByteOpcodeValueAtOffset(stack.currentFrame->locals.instructionPtrAtStartOfOnce, 2+LINK_SIZE);
657                 stack.currentFrame->locals.offset = stack.currentFrame->locals.number << 1;
658                 
659 #ifdef DEBUG
660                 printf("end bracket %d", stack.currentFrame->locals.number);
661                 printf("\n");
662 #endif
663                 
664                 /* Test for a numbered group. This includes groups called as a result
665                  of recursion. Note that whole-pattern recursion is coded as a recurse
666                  into group 0, so it won't be picked up here. Instead, we catch it when
667                  the OP_END is reached. */
668                 
669                 if (stack.currentFrame->locals.number > 0) {
670                     if (stack.currentFrame->locals.offset >= md.offset_max)
671                         md.offset_overflow = true;
672                     else {
673                         md.offset_vector[stack.currentFrame->locals.offset] =
674                         md.offset_vector[md.offset_end - stack.currentFrame->locals.number];
675                         md.offset_vector[stack.currentFrame->locals.offset+1] = stack.currentFrame->args.subjectPtr - md.start_subject;
676                         if (stack.currentFrame->args.offset_top <= stack.currentFrame->locals.offset)
677                             stack.currentFrame->args.offset_top = stack.currentFrame->locals.offset + 2;
678                     }
679                 }
680                 
681                 /* For a non-repeating ket, just continue at this level. This also
682                  happens for a repeating ket if no characters were matched in the group.
683                  This is the forcible breaking of infinite loops as implemented in Perl
684                  5.005. If there is an options reset, it will get obeyed in the normal
685                  course of events. */
686                 
687                 if (*stack.currentFrame->args.instructionPtr == OP_KET || stack.currentFrame->args.subjectPtr == stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
688                     stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
689                     NEXT_OPCODE;
690                 }
691                 
692                 /* The repeating kets try the rest of the pattern or restart from the
693                  preceding bracket, in the appropriate order. */
694                 
695                 if (*stack.currentFrame->args.instructionPtr == OP_KETRMIN) {
696                     RECURSIVE_MATCH(16, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.subpatternStart);
697                     if (is_match)
698                         RRETURN;
699                     RECURSIVE_MATCH_STARTNG_NEW_GROUP(17, stack.currentFrame->locals.instructionPtrAtStartOfOnce, stack.currentFrame->args.subpatternStart);
700                     if (is_match)
701                         RRETURN;
702                 } else { /* OP_KETRMAX */
703                     RECURSIVE_MATCH_STARTNG_NEW_GROUP(18, stack.currentFrame->locals.instructionPtrAtStartOfOnce, stack.currentFrame->args.subpatternStart);
704                     if (is_match)
705                         RRETURN;
706                     RECURSIVE_MATCH(19, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.subpatternStart);
707                     if (is_match)
708                         RRETURN;
709                 }
710                 RRETURN;
711                 
712                 /* Start of subject, or after internal newline if multiline. */
713                 
714                 BEGIN_OPCODE(CIRC):
715                 if (stack.currentFrame->args.subjectPtr != md.start_subject && (!md.multiline || !isNewline(stack.currentFrame->args.subjectPtr[-1])))
716                     RRETURN_NO_MATCH;
717                 stack.currentFrame->args.instructionPtr++;
718                 NEXT_OPCODE;
719                 
720                 /* End of subject, or before internal newline if multiline. */
721                 
722                 BEGIN_OPCODE(DOLL):
723                 if (stack.currentFrame->args.subjectPtr < md.end_subject && (!md.multiline || !isNewline(*stack.currentFrame->args.subjectPtr)))
724                     RRETURN_NO_MATCH;
725                 stack.currentFrame->args.instructionPtr++;
726                 NEXT_OPCODE;
727                 
728                 /* Word boundary assertions */
729                 
730                 BEGIN_OPCODE(NOT_WORD_BOUNDARY):
731                 BEGIN_OPCODE(WORD_BOUNDARY):
732             {
733                 bool currentCharIsWordChar = false;
734                 bool previousCharIsWordChar = false;
735                 
736                 if (stack.currentFrame->args.subjectPtr > md.start_subject)
737                     previousCharIsWordChar = isWordChar(getPreviousChar(stack.currentFrame->args.subjectPtr));
738                 if (stack.currentFrame->args.subjectPtr < md.end_subject)
739                     currentCharIsWordChar = isWordChar(getChar(stack.currentFrame->args.subjectPtr));
740                 
741                 /* Now see if the situation is what we want */
742                 bool wordBoundaryDesired = (*stack.currentFrame->args.instructionPtr++ == OP_WORD_BOUNDARY);
743                 if (wordBoundaryDesired ? currentCharIsWordChar == previousCharIsWordChar : currentCharIsWordChar != previousCharIsWordChar)
744                     RRETURN_NO_MATCH;
745                 NEXT_OPCODE;
746             }
747                 
748                 /* Match a single character type; inline for speed */
749                 
750                 BEGIN_OPCODE(ANY_CHAR):
751                 if (stack.currentFrame->args.subjectPtr < md.end_subject && isNewline(*stack.currentFrame->args.subjectPtr))
752                     RRETURN_NO_MATCH;
753                 if (!movePtrToNextChar(stack.currentFrame->args.subjectPtr, md.end_subject))
754                     RRETURN_NO_MATCH;
755                 stack.currentFrame->args.instructionPtr++;
756                 NEXT_OPCODE;
757                 
758                 BEGIN_OPCODE(NOT_DIGIT):
759             {
760                 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
761                     RRETURN_NO_MATCH;
762                 int c = getCharAndAdvance(stack.currentFrame->args.subjectPtr);
763                 if (isASCIIDigit(c))
764                     RRETURN_NO_MATCH;
765                 stack.currentFrame->args.instructionPtr++;
766                 NEXT_OPCODE;
767             }    
768                 BEGIN_OPCODE(DIGIT):
769             {
770                 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
771                     RRETURN_NO_MATCH;
772                 int c = getCharAndAdvance(stack.currentFrame->args.subjectPtr);
773                 if (!isASCIIDigit(c))
774                     RRETURN_NO_MATCH;
775                 stack.currentFrame->args.instructionPtr++;
776                 NEXT_OPCODE;
777             }
778                 
779                 BEGIN_OPCODE(NOT_WHITESPACE):
780             {
781                 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
782                     RRETURN_NO_MATCH;
783                 int c = getCharAndAdvance(stack.currentFrame->args.subjectPtr);
784                 if (isSpaceChar(c))
785                     RRETURN_NO_MATCH;
786                 stack.currentFrame->args.instructionPtr++;
787                 NEXT_OPCODE;
788             }
789                 
790                 BEGIN_OPCODE(WHITESPACE):
791             {
792                 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
793                     RRETURN_NO_MATCH;
794                 int c = getCharAndAdvance(stack.currentFrame->args.subjectPtr);
795                 if (!isSpaceChar(c))
796                     RRETURN_NO_MATCH;
797                 stack.currentFrame->args.instructionPtr++;
798                 NEXT_OPCODE;
799             }
800                 
801                 BEGIN_OPCODE(NOT_WORDCHAR):
802             {
803                 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
804                     RRETURN_NO_MATCH;
805                 int c = getCharAndAdvance(stack.currentFrame->args.subjectPtr);
806                 if (isWordChar(c))
807                     RRETURN_NO_MATCH;
808                 stack.currentFrame->args.instructionPtr++;
809                 NEXT_OPCODE;
810             }
811                 
812                 BEGIN_OPCODE(WORDCHAR):
813             {
814                 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
815                     RRETURN_NO_MATCH;
816                 int c = getCharAndAdvance(stack.currentFrame->args.subjectPtr);
817                 if (!isWordChar(c))
818                     RRETURN_NO_MATCH;
819                 stack.currentFrame->args.instructionPtr++;
820                 NEXT_OPCODE;
821             }
822                 
823                 /* Match a back reference, possibly repeatedly. Look past the end of the
824                  item to see if there is repeat information following. The code is similar
825                  to that for character classes, but repeated for efficiency. Then obey
826                  similar code to character type repeats - written out again for speed.
827                  However, if the referenced string is the empty string, always treat
828                  it as matched, any number of times (otherwise there could be infinite
829                  loops). */
830                 
831                 BEGIN_OPCODE(REF):
832                 stack.currentFrame->locals.offset = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1) << 1;               /* Doubled ref number */
833                 stack.currentFrame->args.instructionPtr += 3;                                 /* Advance past item */
834                 
835                 /* If the reference is unset, set the length to be longer than the amount
836                  of subject left; this ensures that every attempt at a match fails. We
837                  can't just fail here, because of the possibility of quantifiers with zero
838                  minima. */
839                 
840                 if (stack.currentFrame->locals.offset >= stack.currentFrame->args.offset_top || md.offset_vector[stack.currentFrame->locals.offset] < 0)
841                     stack.currentFrame->locals.length = 0;
842                 else
843                     stack.currentFrame->locals.length = md.offset_vector[stack.currentFrame->locals.offset+1] - md.offset_vector[stack.currentFrame->locals.offset];
844                 
845                 /* Set up for repetition, or handle the non-repeated case */
846                 
847                 switch (*stack.currentFrame->args.instructionPtr) {
848                 case OP_CRSTAR:
849                 case OP_CRMINSTAR:
850                 case OP_CRPLUS:
851                 case OP_CRMINPLUS:
852                 case OP_CRQUERY:
853                 case OP_CRMINQUERY:
854                     repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
855                     break;
856                     
857                 case OP_CRRANGE:
858                 case OP_CRMINRANGE:
859                     minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
860                     min = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
861                     stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 3);
862                     if (stack.currentFrame->locals.max == 0)
863                         stack.currentFrame->locals.max = INT_MAX;
864                     stack.currentFrame->args.instructionPtr += 5;
865                     break;
866                 
867                 default:               /* No repeat follows */
868                     if (!match_ref(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
869                         RRETURN_NO_MATCH;
870                     stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
871                     NEXT_OPCODE;
872                 }
873                 
874                 /* If the length of the reference is zero, just continue with the
875                  main loop. */
876                 
877                 if (stack.currentFrame->locals.length == 0)
878                     NEXT_OPCODE;
879                 
880                 /* First, ensure the minimum number of matches are present. */
881                 
882                 for (int i = 1; i <= min; i++) {
883                     if (!match_ref(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
884                         RRETURN_NO_MATCH;
885                     stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
886                 }
887                 
888                 /* If min = max, continue at the same level without recursion.
889                  They are not both allowed to be zero. */
890                 
891                 if (min == stack.currentFrame->locals.max)
892                     NEXT_OPCODE;
893                 
894                 /* If minimizing, keep trying and advancing the pointer */
895                 
896                 if (minimize) {
897                     for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
898                         RECURSIVE_MATCH(20, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
899                         if (is_match)
900                             RRETURN;
901                         if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || !match_ref(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
902                             RRETURN;
903                         stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
904                     }
905                     /* Control never reaches here */
906                 }
907                 
908                 /* If maximizing, find the longest string and work backwards */
909                 
910                 else {
911                     stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
912                     for (int i = min; i < stack.currentFrame->locals.max; i++) {
913                         if (!match_ref(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
914                             break;
915                         stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
916                     }
917                     while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
918                         RECURSIVE_MATCH(21, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
919                         if (is_match)
920                             RRETURN;
921                         stack.currentFrame->args.subjectPtr -= stack.currentFrame->locals.length;
922                     }
923                     RRETURN_NO_MATCH;
924                 }
925                 /* Control never reaches here */
926                 
927                 /* Match a bit-mapped character class, possibly repeatedly. This op code is
928                  used when all the characters in the class have values in the range 0-255,
929                  and either the matching is caseful, or the characters are in the range
930                  0-127 when UTF-8 processing is enabled. The only difference between
931                  OP_CLASS and OP_NCLASS occurs when a data character outside the range is
932                  encountered.
933                  
934                  First, look past the end of the item to see if there is repeat information
935                  following. Then obey similar code to character type repeats - written out
936                  again for speed. */
937                 
938                 BEGIN_OPCODE(NCLASS):
939                 BEGIN_OPCODE(CLASS):
940                 stack.currentFrame->locals.data = stack.currentFrame->args.instructionPtr + 1;                /* Save for matching */
941                 stack.currentFrame->args.instructionPtr += 33;                     /* Advance past the item */
942                 
943                 switch (*stack.currentFrame->args.instructionPtr) {
944                 case OP_CRSTAR:
945                 case OP_CRMINSTAR:
946                 case OP_CRPLUS:
947                 case OP_CRMINPLUS:
948                 case OP_CRQUERY:
949                 case OP_CRMINQUERY:
950                     repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
951                     break;
952                     
953                 case OP_CRRANGE:
954                 case OP_CRMINRANGE:
955                     minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
956                     min = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
957                     stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 3);
958                     if (stack.currentFrame->locals.max == 0)
959                         stack.currentFrame->locals.max = INT_MAX;
960                     stack.currentFrame->args.instructionPtr += 5;
961                     break;
962                     
963                 default:               /* No repeat follows */
964                     min = stack.currentFrame->locals.max = 1;
965                     break;
966                 }
967                 
968                 /* First, ensure the minimum number of matches are present. */
969                 
970                 for (int i = 1; i <= min; i++) {
971                     if (stack.currentFrame->args.subjectPtr >= md.end_subject)
972                         RRETURN_NO_MATCH;
973                     int c = getCharAndAdvance(stack.currentFrame->args.subjectPtr);
974                     if (c > 255) {
975                         if (stack.currentFrame->locals.data[-1] == OP_CLASS)
976                             RRETURN_NO_MATCH;
977                     } else {
978                         if (!(stack.currentFrame->locals.data[c / 8] & (1 << (c & 7))))
979                             RRETURN_NO_MATCH;
980                     }
981                 }
982                 
983                 /* If max == min we can continue with the main loop without the
984                  need to recurse. */
985                 
986                 if (min == stack.currentFrame->locals.max)
987                     NEXT_OPCODE;      
988                 
989                 /* If minimizing, keep testing the rest of the expression and advancing
990                  the pointer while it matches the class. */
991                 if (minimize) {
992                     for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
993                         RECURSIVE_MATCH(22, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
994                         if (is_match)
995                             RRETURN;
996                         if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.end_subject)
997                             RRETURN;
998                         int c = getCharAndAdvance(stack.currentFrame->args.subjectPtr);
999                         if (c > 255) {
1000                             if (stack.currentFrame->locals.data[-1] == OP_CLASS)
1001                                 RRETURN;
1002                         } else {
1003                             if ((stack.currentFrame->locals.data[c/8] & (1 << (c&7))) == 0)
1004                                 RRETURN;
1005                         }
1006                     }
1007                     /* Control never reaches here */
1008                 }
1009                 /* If maximizing, find the longest possible run, then work backwards. */
1010                 else {
1011                     stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1012                     
1013                     for (int i = min; i < stack.currentFrame->locals.max; i++) {
1014                         if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1015                             break;
1016                         int length;
1017                         int c = getCharAndLength(stack.currentFrame->args.subjectPtr, length);
1018                         if (c > 255) {
1019                             if (stack.currentFrame->locals.data[-1] == OP_CLASS)
1020                                 break;
1021                         } else {
1022                             if (!(stack.currentFrame->locals.data[c / 8] & (1 << (c & 7))))
1023                                 break;
1024                         }
1025                         stack.currentFrame->args.subjectPtr += length;
1026                     }
1027                     for (;;) {
1028                         RECURSIVE_MATCH(24, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1029                         if (is_match)
1030                             RRETURN;
1031                         if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
1032                             break;        /* Stop if tried at original pos */
1033                         movePtrToStartOfCurrentChar(stack.currentFrame->args.subjectPtr);
1034                     }
1035                     
1036                     RRETURN;
1037                 }
1038                 /* Control never reaches here */
1039                 
1040                 /* Match an extended character class. This opcode is encountered only
1041                  in UTF-8 mode, because that's the only time it is compiled. */
1042                 
1043                 BEGIN_OPCODE(XCLASS):
1044                 stack.currentFrame->locals.data = stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE;                /* Save for matching */
1045                 stack.currentFrame->args.instructionPtr += getOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);                      /* Advance past the item */
1046                 
1047                 switch (*stack.currentFrame->args.instructionPtr) {
1048                 case OP_CRSTAR:
1049                 case OP_CRMINSTAR:
1050                 case OP_CRPLUS:
1051                 case OP_CRMINPLUS:
1052                 case OP_CRQUERY:
1053                 case OP_CRMINQUERY:
1054                     repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
1055                     break;
1056                     
1057                 case OP_CRRANGE:
1058                 case OP_CRMINRANGE:
1059                     minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
1060                     min = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
1061                     stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 3);
1062                     if (stack.currentFrame->locals.max == 0)
1063                         stack.currentFrame->locals.max = INT_MAX;
1064                     stack.currentFrame->args.instructionPtr += 5;
1065                     break;
1066                     
1067                 default:               /* No repeat follows */
1068                     min = stack.currentFrame->locals.max = 1;
1069             }
1070                 
1071                 /* First, ensure the minimum number of matches are present. */
1072                 
1073                 for (int i = 1; i <= min; i++) {
1074                     if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1075                         RRETURN_NO_MATCH;
1076                     int c = getCharAndAdvance(stack.currentFrame->args.subjectPtr);
1077                     if (!_pcre_xclass(c, stack.currentFrame->locals.data))
1078                         RRETURN_NO_MATCH;
1079                 }
1080                 
1081                 /* If max == min we can continue with the main loop without the
1082                  need to recurse. */
1083                 
1084                 if (min == stack.currentFrame->locals.max)
1085                     NEXT_OPCODE;
1086                 
1087                 /* If minimizing, keep testing the rest of the expression and advancing
1088                  the pointer while it matches the class. */
1089                 
1090                 if (minimize) {
1091                     for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1092                         RECURSIVE_MATCH(26, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1093                         if (is_match)
1094                             RRETURN;
1095                         if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.end_subject)
1096                             RRETURN;
1097                         int c = getCharAndAdvance(stack.currentFrame->args.subjectPtr);
1098                         if (!_pcre_xclass(c, stack.currentFrame->locals.data))
1099                             RRETURN;
1100                     }
1101                     /* Control never reaches here */
1102                 }
1103                 
1104                 /* If maximizing, find the longest possible run, then work backwards. */
1105                 
1106                 else {
1107                     stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1108                     for (int i = min; i < stack.currentFrame->locals.max; i++) {
1109                         if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1110                             break;
1111                         int length;
1112                         int c = getCharAndLength(stack.currentFrame->args.subjectPtr, length);
1113                         if (!_pcre_xclass(c, stack.currentFrame->locals.data))
1114                             break;
1115                         stack.currentFrame->args.subjectPtr += length;
1116                     }
1117                     for(;;) {
1118                         RECURSIVE_MATCH(27, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1119                         if (is_match)
1120                             RRETURN;
1121                         if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
1122                             break;        /* Stop if tried at original pos */
1123                         movePtrToStartOfCurrentChar(stack.currentFrame->args.subjectPtr);
1124                     }
1125                     RRETURN;
1126                 }
1127                 
1128                 /* Control never reaches here */
1129                 
1130                 /* Match a single character, casefully */
1131                 
1132                 BEGIN_OPCODE(CHAR):
1133                 stack.currentFrame->locals.length = 1;
1134                 stack.currentFrame->args.instructionPtr++;
1135                 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
1136             {
1137                 int dc;
1138                 stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length;
1139                 switch (md.end_subject - stack.currentFrame->args.subjectPtr) {
1140                 case 0:
1141                     RRETURN_NO_MATCH;
1142                 case 1:
1143                     dc = *stack.currentFrame->args.subjectPtr++;
1144                     if (isLeadingSurrogate(dc))
1145                         RRETURN_NO_MATCH;
1146                     break;
1147                 default:
1148                     dc = getCharAndAdvance(stack.currentFrame->args.subjectPtr);
1149                 }
1150                 if (stack.currentFrame->locals.fc != dc)
1151                     RRETURN_NO_MATCH;
1152             }
1153                 NEXT_OPCODE;
1154                 
1155                 /* Match a single character, caselessly */
1156                 
1157                 BEGIN_OPCODE(CHAR_IGNORING_CASE):
1158                 stack.currentFrame->locals.length = 1;
1159                 stack.currentFrame->args.instructionPtr++;
1160                 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
1161                 
1162                 if (md.end_subject - stack.currentFrame->args.subjectPtr == 0)
1163                     RRETURN_NO_MATCH;
1164                 
1165             {
1166                 int dc;
1167                 if (md.end_subject - stack.currentFrame->args.subjectPtr == 1) {
1168                     dc = *stack.currentFrame->args.subjectPtr++;
1169                     if (isLeadingSurrogate(dc))
1170                         RRETURN_NO_MATCH;
1171                 } else
1172                     dc = getCharAndAdvance(stack.currentFrame->args.subjectPtr);
1173                 stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length;
1174                 
1175                 /* If we have Unicode property support, we can use it to test the other
1176                  case of the character, if there is one. */
1177                 
1178                 if (stack.currentFrame->locals.fc != dc) {
1179                     if (dc != _pcre_ucp_othercase(stack.currentFrame->locals.fc))
1180                         RRETURN_NO_MATCH;
1181                 }
1182             }
1183                 NEXT_OPCODE;
1184                 
1185                 /* Match a single ASCII character. */
1186                 
1187                 BEGIN_OPCODE(ASCII_CHAR):
1188                 if (md.end_subject == stack.currentFrame->args.subjectPtr)
1189                     RRETURN_NO_MATCH;
1190                 if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->args.instructionPtr[1])
1191                     RRETURN_NO_MATCH;
1192                 ++stack.currentFrame->args.subjectPtr;
1193                 stack.currentFrame->args.instructionPtr += 2;
1194                 NEXT_OPCODE;
1195                 
1196                 /* Match one of two cases of an ASCII character. */
1197                 
1198                 BEGIN_OPCODE(ASCII_LETTER_IGNORING_CASE):
1199                 if (md.end_subject == stack.currentFrame->args.subjectPtr)
1200                     RRETURN_NO_MATCH;
1201                 if ((*stack.currentFrame->args.subjectPtr | 0x20) != stack.currentFrame->args.instructionPtr[1])
1202                     RRETURN_NO_MATCH;
1203                 ++stack.currentFrame->args.subjectPtr;
1204                 stack.currentFrame->args.instructionPtr += 2;
1205                 NEXT_OPCODE;
1206                 
1207                 /* Match a single character repeatedly; different opcodes share code. */
1208                 
1209                 BEGIN_OPCODE(EXACT):
1210                 min = stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
1211                 minimize = false;
1212                 stack.currentFrame->args.instructionPtr += 3;
1213                 goto REPEATCHAR;
1214                 
1215                 BEGIN_OPCODE(UPTO):
1216                 BEGIN_OPCODE(MINUPTO):
1217                 min = 0;
1218                 stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
1219                 minimize = *stack.currentFrame->args.instructionPtr == OP_MINUPTO;
1220                 stack.currentFrame->args.instructionPtr += 3;
1221                 goto REPEATCHAR;
1222                 
1223                 BEGIN_OPCODE(STAR):
1224                 BEGIN_OPCODE(MINSTAR):
1225                 BEGIN_OPCODE(PLUS):
1226                 BEGIN_OPCODE(MINPLUS):
1227                 BEGIN_OPCODE(QUERY):
1228                 BEGIN_OPCODE(MINQUERY):
1229                 repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_STAR, minimize, min, stack.currentFrame->locals.max);
1230                 
1231                 /* Common code for all repeated single-character matches. We can give
1232                  up quickly if there are fewer than the minimum number of characters left in
1233                  the subject. */
1234                 
1235             REPEATCHAR:
1236                 
1237                 stack.currentFrame->locals.length = 1;
1238                 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
1239                 if (min * (stack.currentFrame->locals.fc > 0xFFFF ? 2 : 1) > md.end_subject - stack.currentFrame->args.subjectPtr)
1240                     RRETURN_NO_MATCH;
1241                 stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length;
1242                 
1243                 if (stack.currentFrame->locals.fc <= 0xFFFF) {
1244                     int othercase = md.ignoreCase ? _pcre_ucp_othercase(stack.currentFrame->locals.fc) : -1;
1245                     
1246                     for (int i = 1; i <= min; i++) {
1247                         if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != othercase)
1248                             RRETURN_NO_MATCH;
1249                         ++stack.currentFrame->args.subjectPtr;
1250                     }
1251                     
1252                     if (min == stack.currentFrame->locals.max)
1253                         NEXT_OPCODE;
1254                     
1255                     if (minimize) {
1256                         stack.currentFrame->locals.repeat_othercase = othercase;
1257                         for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1258                             RECURSIVE_MATCH(28, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1259                             if (is_match)
1260                                 RRETURN;
1261                             if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.end_subject)
1262                                 RRETURN;
1263                             if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.repeat_othercase)
1264                                 RRETURN;
1265                             ++stack.currentFrame->args.subjectPtr;
1266                         }
1267                         /* Control never reaches here */
1268                     } else {
1269                         stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1270                         for (int i = min; i < stack.currentFrame->locals.max; i++) {
1271                             if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1272                                 break;
1273                             if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != othercase)
1274                                 break;
1275                             ++stack.currentFrame->args.subjectPtr;
1276                         }
1277                         while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
1278                             RECURSIVE_MATCH(29, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1279                             if (is_match)
1280                                 RRETURN;
1281                             --stack.currentFrame->args.subjectPtr;
1282                         }
1283                         RRETURN_NO_MATCH;
1284                     }
1285                     /* Control never reaches here */
1286                 } else {
1287                     /* No case on surrogate pairs, so no need to bother with "othercase". */
1288                     
1289                     for (int i = 1; i <= min; i++) {
1290                         if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
1291                             RRETURN_NO_MATCH;
1292                         stack.currentFrame->args.subjectPtr += 2;
1293                     }
1294                     
1295                     if (min == stack.currentFrame->locals.max)
1296                         NEXT_OPCODE;
1297                     
1298                     if (minimize) {
1299                         for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1300                             RECURSIVE_MATCH(30, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1301                             if (is_match)
1302                                 RRETURN;
1303                             if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.end_subject)
1304                                 RRETURN;
1305                             if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
1306                                 RRETURN;
1307                             stack.currentFrame->args.subjectPtr += 2;
1308                         }
1309                         /* Control never reaches here */
1310                     } else {
1311                         stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1312                         for (int i = min; i < stack.currentFrame->locals.max; i++) {
1313                             if (stack.currentFrame->args.subjectPtr > md.end_subject - 2)
1314                                 break;
1315                             if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
1316                                 break;
1317                             stack.currentFrame->args.subjectPtr += 2;
1318                         }
1319                         while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
1320                             RECURSIVE_MATCH(31, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1321                             if (is_match)
1322                                 RRETURN;
1323                             stack.currentFrame->args.subjectPtr -= 2;
1324                         }
1325                         RRETURN_NO_MATCH;
1326                     }
1327                     /* Control never reaches here */
1328                 }
1329                 /* Control never reaches here */
1330                 
1331                 /* Match a negated single one-byte character. The character we are
1332                  checking can be multibyte. */
1333                 
1334                 BEGIN_OPCODE(NOT):
1335             {
1336                 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1337                     RRETURN_NO_MATCH;
1338                 stack.currentFrame->args.instructionPtr++;
1339                 int c = getCharAndAdvance(stack.currentFrame->args.subjectPtr);
1340                 if (md.ignoreCase) {
1341                     if (c < 128)
1342                         c = toLowerCase(c);
1343                     if (toLowerCase(*stack.currentFrame->args.instructionPtr++) == c)
1344                         RRETURN_NO_MATCH;
1345                 } else {
1346                     if (*stack.currentFrame->args.instructionPtr++ == c)
1347                         RRETURN_NO_MATCH;
1348                 }
1349                 NEXT_OPCODE;
1350             }
1351                 
1352                 /* Match a negated single one-byte character repeatedly. This is almost a
1353                  repeat of the code for a repeated single character, but I haven't found a
1354                  nice way of commoning these up that doesn't require a test of the
1355                  positive/negative option for each character match. Maybe that wouldn't add
1356                  very much to the time taken, but character matching *is* what this is all
1357                  about... */
1358                 
1359                 BEGIN_OPCODE(NOTEXACT):
1360                 min = stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
1361                 minimize = false;
1362                 stack.currentFrame->args.instructionPtr += 3;
1363                 goto REPEATNOTCHAR;
1364                 
1365                 BEGIN_OPCODE(NOTUPTO):
1366                 BEGIN_OPCODE(NOTMINUPTO):
1367                 min = 0;
1368                 stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
1369                 minimize = *stack.currentFrame->args.instructionPtr == OP_NOTMINUPTO;
1370                 stack.currentFrame->args.instructionPtr += 3;
1371                 goto REPEATNOTCHAR;
1372                 
1373                 BEGIN_OPCODE(NOTSTAR):
1374                 BEGIN_OPCODE(NOTMINSTAR):
1375                 BEGIN_OPCODE(NOTPLUS):
1376                 BEGIN_OPCODE(NOTMINPLUS):
1377                 BEGIN_OPCODE(NOTQUERY):
1378                 BEGIN_OPCODE(NOTMINQUERY):
1379                 repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_NOTSTAR, minimize, min, stack.currentFrame->locals.max);
1380                 
1381                 /* Common code for all repeated single-byte matches. We can give up quickly
1382                  if there are fewer than the minimum number of bytes left in the
1383                  subject. */
1384                 
1385             REPEATNOTCHAR:
1386                 if (min > md.end_subject - stack.currentFrame->args.subjectPtr)
1387                     RRETURN_NO_MATCH;
1388                 stack.currentFrame->locals.fc = *stack.currentFrame->args.instructionPtr++;
1389                 
1390                 /* The code is duplicated for the caseless and caseful cases, for speed,
1391                  since matching characters is likely to be quite common. First, ensure the
1392                  minimum number of matches are present. If min = max, continue at the same
1393                  level without recursing. Otherwise, if minimizing, keep trying the rest of
1394                  the expression and advancing one matching character if failing, up to the
1395                  maximum. Alternatively, if maximizing, find the maximum number of
1396                  characters and work backwards. */
1397                 
1398                 DPRINTF(("negative matching %c{%d,%d}\n", stack.currentFrame->locals.fc, min, stack.currentFrame->locals.max));
1399                 
1400                 if (md.ignoreCase) {
1401                     if (stack.currentFrame->locals.fc < 128)
1402                         stack.currentFrame->locals.fc = toLowerCase(stack.currentFrame->locals.fc);
1403                     
1404                     {
1405                         for (int i = 1; i <= min; i++) {
1406                             int d = getCharAndAdvance(stack.currentFrame->args.subjectPtr);
1407                             if (d < 128)
1408                                 d = toLowerCase(d);
1409                             if (stack.currentFrame->locals.fc == d)
1410                                 RRETURN_NO_MATCH;
1411                         }
1412                     }
1413                     
1414                     if (min == stack.currentFrame->locals.max)
1415                         NEXT_OPCODE;      
1416                     
1417                     if (minimize) {
1418                         for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1419                             RECURSIVE_MATCH(38, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1420                             if (is_match)
1421                                 RRETURN;
1422                             int d = getCharAndAdvance(stack.currentFrame->args.subjectPtr);
1423                             if (d < 128)
1424                                 d = toLowerCase(d);
1425                             if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.end_subject || stack.currentFrame->locals.fc == d)
1426                                 RRETURN;
1427                         }
1428                         /* Control never reaches here */
1429                     }
1430                     
1431                     /* Maximize case */
1432                     
1433                     else {
1434                         stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1435                         
1436                         for (int i = min; i < stack.currentFrame->locals.max; i++) {
1437                             if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1438                                 break;
1439                             int length;
1440                             int d = getCharAndLength(stack.currentFrame->args.subjectPtr, length);
1441                             if (d < 128)
1442                                 d = toLowerCase(d);
1443                             if (stack.currentFrame->locals.fc == d)
1444                                 break;
1445                             stack.currentFrame->args.subjectPtr += length;
1446                         }
1447                         for (;;) {
1448                             RECURSIVE_MATCH(40, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1449                             if (is_match)
1450                                 RRETURN;
1451                             if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
1452                                 break;        /* Stop if tried at original pos */
1453                             movePtrToStartOfCurrentChar(stack.currentFrame->args.subjectPtr);
1454                         }
1455                         
1456                         RRETURN;
1457                     }
1458                     /* Control never reaches here */
1459                 }
1460                 
1461                 /* Caseful comparisons */
1462                 
1463                 else {
1464                     for (int i = 1; i <= min; i++) {
1465                         int d = getCharAndAdvance(stack.currentFrame->args.subjectPtr);
1466                         if (stack.currentFrame->locals.fc == d)
1467                             RRETURN_NO_MATCH;
1468                     }
1469
1470                     if (min == stack.currentFrame->locals.max)
1471                         NEXT_OPCODE;
1472                     
1473                     if (minimize) {
1474                         for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1475                             RECURSIVE_MATCH(42, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1476                             if (is_match)
1477                                 RRETURN;
1478                             int d = getCharAndAdvance(stack.currentFrame->args.subjectPtr);
1479                             if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.end_subject || stack.currentFrame->locals.fc == d)
1480                                 RRETURN;
1481                         }
1482                         /* Control never reaches here */
1483                     }
1484                     
1485                     /* Maximize case */
1486                     
1487                     else {
1488                         stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1489                         
1490                         {
1491                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
1492                                 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1493                                     break;
1494                                 int length;
1495                                 int d = getCharAndLength(stack.currentFrame->args.subjectPtr, length);
1496                                 if (stack.currentFrame->locals.fc == d)
1497                                     break;
1498                                 stack.currentFrame->args.subjectPtr += length;
1499                             }
1500                             for (;;) {
1501                                 RECURSIVE_MATCH(44, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1502                                 if (is_match)
1503                                     RRETURN;
1504                                 if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
1505                                     break;        /* Stop if tried at original pos */
1506                                 movePtrToStartOfCurrentChar(stack.currentFrame->args.subjectPtr);
1507                             }
1508                         }
1509                         
1510                         RRETURN;
1511                     }
1512                 }
1513                 /* Control never reaches here */
1514                 
1515                 /* Match a single character type repeatedly; several different opcodes
1516                  share code. This is very similar to the code for single characters, but we
1517                  repeat it in the interests of efficiency. */
1518                 
1519                 BEGIN_OPCODE(TYPEEXACT):
1520                 min = stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
1521                 minimize = true;
1522                 stack.currentFrame->args.instructionPtr += 3;
1523                 goto REPEATTYPE;
1524                 
1525                 BEGIN_OPCODE(TYPEUPTO):
1526                 BEGIN_OPCODE(TYPEMINUPTO):
1527                 min = 0;
1528                 stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
1529                 minimize = *stack.currentFrame->args.instructionPtr == OP_TYPEMINUPTO;
1530                 stack.currentFrame->args.instructionPtr += 3;
1531                 goto REPEATTYPE;
1532                 
1533                 BEGIN_OPCODE(TYPESTAR):
1534                 BEGIN_OPCODE(TYPEMINSTAR):
1535                 BEGIN_OPCODE(TYPEPLUS):
1536                 BEGIN_OPCODE(TYPEMINPLUS):
1537                 BEGIN_OPCODE(TYPEQUERY):
1538                 BEGIN_OPCODE(TYPEMINQUERY):
1539                 repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_TYPESTAR, minimize, min, stack.currentFrame->locals.max);
1540                 
1541                 /* Common code for all repeated single character type matches. Note that
1542                  in UTF-8 mode, '.' matches a character of any length, but for the other
1543                  character types, the valid characters are all one-byte long. */
1544                 
1545             REPEATTYPE:
1546                 stack.currentFrame->locals.ctype = *stack.currentFrame->args.instructionPtr++;      /* Code for the character type */
1547                 
1548                 /* First, ensure the minimum number of matches are present. Use inline
1549                  code for maximizing the speed, and do the type test once at the start
1550                  (i.e. keep it out of the loop). Also we can test that there are at least
1551                  the minimum number of bytes before we start. This isn't as effective in
1552                  UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that
1553                  is tidier. Also separate the UCP code, which can be the same for both UTF-8
1554                  and single-bytes. */
1555                 
1556                 if (min > md.end_subject - stack.currentFrame->args.subjectPtr)
1557                     RRETURN_NO_MATCH;
1558                 if (min > 0) {
1559                     switch(stack.currentFrame->locals.ctype) {
1560                         case OP_ANY_CHAR:
1561                             for (int i = 1; i <= min; i++) {
1562                                 if (isNewline(*stack.currentFrame->args.subjectPtr))
1563                                     RRETURN_NO_MATCH;
1564                                 if (!movePtrToNextChar(stack.currentFrame->args.subjectPtr, md.end_subject))
1565                                     RRETURN_NO_MATCH;
1566                             }
1567                             break;
1568                             
1569                             case OP_NOT_DIGIT:
1570                             for (int i = 1; i <= min; i++) {
1571                                 if (isASCIIDigit(*stack.currentFrame->args.subjectPtr))
1572                                     RRETURN_NO_MATCH;
1573                                 if (!movePtrToNextChar(stack.currentFrame->args.subjectPtr, md.end_subject))
1574                                     RRETURN_NO_MATCH;
1575                             }
1576                             break;
1577                             
1578                             case OP_DIGIT:
1579                             for (int i = 1; i <= min; i++) {
1580                                 // FIXME: Why do we advance the subjectPtr here but not in OP_WHITESPACE or OP_WORDCHAR ?
1581                                 if (stack.currentFrame->args.subjectPtr >= md.end_subject || !isASCIIDigit(*stack.currentFrame->args.subjectPtr++))
1582                                     RRETURN_NO_MATCH;
1583                                 /* No need to skip more bytes - we know it's a 1-byte character */
1584                             }
1585                             break;
1586                             
1587                             case OP_NOT_WHITESPACE:
1588                             for (int i = 1; i <= min; i++) {
1589                                 if (isSpaceChar(*stack.currentFrame->args.subjectPtr))
1590                                     RRETURN_NO_MATCH;
1591                                 if (!movePtrToNextChar(stack.currentFrame->args.subjectPtr, md.end_subject))
1592                                     RRETURN_NO_MATCH;
1593                             }
1594                             break;
1595                             
1596                             case OP_WHITESPACE:
1597                             for (int i = 1; i <= min; i++) {
1598                                 if (stack.currentFrame->args.subjectPtr >= md.end_subject || !isSpaceChar(*stack.currentFrame->args.subjectPtr++))
1599                                     RRETURN_NO_MATCH;
1600                                 /* No need to skip more bytes - we know it's a 1-byte character */
1601                             }
1602                             break;
1603                             
1604                             case OP_NOT_WORDCHAR:
1605                             for (int i = 1; i <= min; i++) {
1606                                 if (isWordChar(*stack.currentFrame->args.subjectPtr))
1607                                     RRETURN_NO_MATCH;
1608                                 if (!movePtrToNextChar(stack.currentFrame->args.subjectPtr, md.end_subject))
1609                                     RRETURN_NO_MATCH;
1610                             }
1611                             break;
1612                             
1613                             case OP_WORDCHAR:
1614                             for (int i = 1; i <= min; i++) {
1615                                 if (stack.currentFrame->args.subjectPtr >= md.end_subject || !isWordChar(*stack.currentFrame->args.subjectPtr++))
1616                                     RRETURN_NO_MATCH;
1617                                 /* No need to skip more bytes - we know it's a 1-byte character */
1618                             }
1619                             break;
1620                             
1621                             default:
1622                             ASSERT_NOT_REACHED();
1623                             return matchError(JSRegExpErrorInternal, stack);
1624                     }  /* End switch(stack.currentFrame->locals.ctype) */
1625                 }
1626                 
1627                 /* If min = max, continue at the same level without recursing */
1628                 
1629                 if (min == stack.currentFrame->locals.max)
1630                     NEXT_OPCODE;    
1631                 
1632                 /* If minimizing, we have to test the rest of the pattern before each
1633                  subsequent match. */
1634                 
1635                 if (minimize) {
1636                     for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1637                         RECURSIVE_MATCH(48, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1638                         if (is_match)
1639                             RRETURN;
1640                         if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.end_subject)
1641                             RRETURN;
1642                         
1643                         int c = getCharAndAdvance(stack.currentFrame->args.subjectPtr);
1644                         switch(stack.currentFrame->locals.ctype) {
1645                         case OP_ANY_CHAR:
1646                             if (isNewline(c))
1647                                 RRETURN;
1648                             break;
1649                             
1650                         case OP_NOT_DIGIT:
1651                             if (isASCIIDigit(c))
1652                                 RRETURN;
1653                             break;
1654                             
1655                         case OP_DIGIT:
1656                             if (!isASCIIDigit(c))
1657                                 RRETURN;
1658                             break;
1659                             
1660                         case OP_NOT_WHITESPACE:
1661                             if (isSpaceChar(c))
1662                                 RRETURN;
1663                             break;
1664                             
1665                         case OP_WHITESPACE:
1666                             if  (!isSpaceChar(c))
1667                                 RRETURN;
1668                             break;
1669                             
1670                         case OP_NOT_WORDCHAR:
1671                             if (isWordChar(c))
1672                                 RRETURN;
1673                             break;
1674                             
1675                         case OP_WORDCHAR:
1676                             if (!isWordChar(c))
1677                                 RRETURN;
1678                             break;
1679                             
1680                         default:
1681                             ASSERT_NOT_REACHED();
1682                             return matchError(JSRegExpErrorInternal, stack);
1683                         }
1684                     }
1685                     /* Control never reaches here */
1686                 }
1687                 
1688                 /* If maximizing it is worth using inline code for speed, doing the type
1689                  test once at the start (i.e. keep it out of the loop). */
1690                 
1691                 else {
1692                     stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;  /* Remember where we started */
1693                     
1694                     switch(stack.currentFrame->locals.ctype) {
1695                         case OP_ANY_CHAR:
1696                             
1697                             /* Special code is required for UTF8, but when the maximum is unlimited
1698                              we don't need it, so we repeat the non-UTF8 code. This is probably
1699                              worth it, because .* is quite a common idiom. */
1700                             
1701                             if (stack.currentFrame->locals.max < INT_MAX) {
1702                                 for (int i = min; i < stack.currentFrame->locals.max; i++) {
1703                                     if (stack.currentFrame->args.subjectPtr >= md.end_subject || isNewline(*stack.currentFrame->args.subjectPtr))
1704                                         break;
1705                                     stack.currentFrame->args.subjectPtr++;
1706                                     while (stack.currentFrame->args.subjectPtr < md.end_subject && (*stack.currentFrame->args.subjectPtr & 0xc0) == 0x80)
1707                                         stack.currentFrame->args.subjectPtr++;
1708                                 }
1709                             }
1710                             
1711                             /* Handle unlimited UTF-8 repeat */
1712                             
1713                             else {
1714                                 for (int i = min; i < stack.currentFrame->locals.max; i++) {
1715                                     if (stack.currentFrame->args.subjectPtr >= md.end_subject || isNewline(*stack.currentFrame->args.subjectPtr))
1716                                         break;
1717                                     stack.currentFrame->args.subjectPtr++;
1718                                 }
1719                                 break;
1720                             }
1721                             break;
1722                             
1723                             case OP_NOT_DIGIT:
1724                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
1725                                 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1726                                     break;
1727                                 int length;
1728                                 int c = getCharAndLength(stack.currentFrame->args.subjectPtr, length);
1729                                 if (isASCIIDigit(c))
1730                                     break;
1731                                 stack.currentFrame->args.subjectPtr += length;
1732                             }
1733                             break;
1734                             
1735                             case OP_DIGIT:
1736                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
1737                                 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1738                                     break;
1739                                 int length;
1740                                 int c = getCharAndLength(stack.currentFrame->args.subjectPtr, length);
1741                                 if (!isASCIIDigit(c))
1742                                     break;
1743                                 stack.currentFrame->args.subjectPtr += length;
1744                             }
1745                             break;
1746                             
1747                             case OP_NOT_WHITESPACE:
1748                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
1749                                 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1750                                     break;
1751                                 int length;
1752                                 int c = getCharAndLength(stack.currentFrame->args.subjectPtr, length);
1753                                 if (isSpaceChar(c))
1754                                     break;
1755                                 stack.currentFrame->args.subjectPtr += length;
1756                             }
1757                             break;
1758                             
1759                             case OP_WHITESPACE:
1760                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
1761                                 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1762                                     break;
1763                                 int length;
1764                                 int c = getCharAndLength(stack.currentFrame->args.subjectPtr, length);
1765                                 if (!isSpaceChar(c))
1766                                     break;
1767                                 stack.currentFrame->args.subjectPtr += length;
1768                             }
1769                             break;
1770                             
1771                             case OP_NOT_WORDCHAR:
1772                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
1773                                 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1774                                     break;
1775                                 int length;
1776                                 int c = getCharAndLength(stack.currentFrame->args.subjectPtr, length);
1777                                 if (isWordChar(c))
1778                                     break;
1779                                 stack.currentFrame->args.subjectPtr += length;
1780                             }
1781                             break;
1782                             
1783                             case OP_WORDCHAR:
1784                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
1785                                 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1786                                     break;
1787                                 int length;
1788                                 int c = getCharAndLength(stack.currentFrame->args.subjectPtr, length);
1789                                 if (!isWordChar(c))
1790                                     break;
1791                                 stack.currentFrame->args.subjectPtr += length;
1792                             }
1793                             break;
1794                             
1795                             default:
1796                             ASSERT_NOT_REACHED();
1797                             return matchError(JSRegExpErrorInternal, stack);
1798                     }
1799                     
1800                     /* stack.currentFrame->args.subjectPtr is now past the end of the maximum run */
1801                     
1802                     for (;;) {
1803                         RECURSIVE_MATCH(52, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1804                         if (is_match)
1805                             RRETURN;
1806                         if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
1807                             break;        /* Stop if tried at original pos */
1808                         movePtrToStartOfCurrentChar(stack.currentFrame->args.subjectPtr);
1809                     }
1810                     
1811                     /* Get here if we can't make it match with any permitted repetitions */
1812                     
1813                     RRETURN;
1814                 }
1815                 /* Control never reaches here */
1816                 
1817                 BEGIN_OPCODE(CRMINPLUS):
1818                 BEGIN_OPCODE(CRMINQUERY):
1819                 BEGIN_OPCODE(CRMINRANGE):
1820                 BEGIN_OPCODE(CRMINSTAR):
1821                 BEGIN_OPCODE(CRPLUS):
1822                 BEGIN_OPCODE(CRQUERY):
1823                 BEGIN_OPCODE(CRRANGE):
1824                 BEGIN_OPCODE(CRSTAR):
1825                 ASSERT_NOT_REACHED();
1826                 return matchError(JSRegExpErrorInternal, stack);
1827                 
1828 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
1829             CAPTURING_BRACKET:
1830 #else
1831                 default:
1832 #endif
1833                 /* Opening capturing bracket. If there is space in the offset vector, save
1834                  the current subject position in the working slot at the top of the vector. We
1835                  mustn't change the current values of the data slot, because they may be set
1836                  from a previous iteration of this group, and be referred to by a reference
1837                  inside the group.
1838                  
1839                  If the bracket fails to match, we need to restore this value and also the
1840                  values of the final offsets, in case they were set by a previous iteration of
1841                  the same bracket.
1842                  
1843                  If there isn't enough space in the offset vector, treat this as if it were a
1844                  non-capturing bracket. Don't worry about setting the flag for the error case
1845                  here; that is handled in the code for KET. */
1846                 
1847                 ASSERT(*stack.currentFrame->args.instructionPtr > OP_BRA);
1848                 
1849                 stack.currentFrame->locals.number = *stack.currentFrame->args.instructionPtr - OP_BRA;
1850                 
1851                 /* For extended extraction brackets (large number), we have to fish out the
1852                  number from a dummy opcode at the start. */
1853                 
1854                 if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX)
1855                     stack.currentFrame->locals.number = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 2+LINK_SIZE);
1856                 stack.currentFrame->locals.offset = stack.currentFrame->locals.number << 1;
1857                 
1858 #ifdef DEBUG
1859                 printf("start bracket %d subject=", stack.currentFrame->locals.number);
1860                 pchars(stack.currentFrame->args.subjectPtr, 16, true, md);
1861                 printf("\n");
1862 #endif
1863                 
1864                 if (stack.currentFrame->locals.offset < md.offset_max) {
1865                     stack.currentFrame->locals.save_offset1 = md.offset_vector[stack.currentFrame->locals.offset];
1866                     stack.currentFrame->locals.save_offset2 = md.offset_vector[stack.currentFrame->locals.offset + 1];
1867                     stack.currentFrame->locals.save_offset3 = md.offset_vector[md.offset_end - stack.currentFrame->locals.number];
1868                     
1869                     DPRINTF(("saving %d %d %d\n", stack.currentFrame->locals.save_offset1, stack.currentFrame->locals.save_offset2, stack.currentFrame->locals.save_offset3));
1870                     md.offset_vector[md.offset_end - stack.currentFrame->locals.number] = stack.currentFrame->args.subjectPtr - md.start_subject;
1871                     
1872                     do {
1873                         RECURSIVE_MATCH_STARTNG_NEW_GROUP(1, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.subpatternStart);
1874                         if (is_match)
1875                             RRETURN;
1876                         stack.currentFrame->args.instructionPtr += getOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
1877                     } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
1878                     
1879                     DPRINTF(("bracket %d failed\n", stack.currentFrame->locals.number));
1880                     
1881                     md.offset_vector[stack.currentFrame->locals.offset] = stack.currentFrame->locals.save_offset1;
1882                     md.offset_vector[stack.currentFrame->locals.offset + 1] = stack.currentFrame->locals.save_offset2;
1883                     md.offset_vector[md.offset_end - stack.currentFrame->locals.number] = stack.currentFrame->locals.save_offset3;
1884                     
1885                     RRETURN;
1886                 }
1887                 
1888                 /* Insufficient room for saving captured contents */
1889                 
1890                 goto NON_CAPTURING_BRACKET;
1891         }
1892         
1893         /* Do not stick any code in here without much thought; it is assumed
1894          that "continue" in the code above comes out to here to repeat the main
1895          loop. */
1896         
1897     } /* End of main loop */
1898     
1899     ASSERT_NOT_REACHED();
1900     
1901 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
1902     
1903 RRETURN_SWITCH:
1904     switch (stack.currentFrame->returnLocation)
1905     {
1906         case 0: goto RETURN;
1907         case 1: goto RRETURN_1;
1908         case 2: goto RRETURN_2;
1909         case 6: goto RRETURN_6;
1910         case 7: goto RRETURN_7;
1911         case 9: goto RRETURN_9;
1912         case 10: goto RRETURN_10;
1913         case 11: goto RRETURN_11;
1914         case 12: goto RRETURN_12;
1915         case 13: goto RRETURN_13;
1916         case 14: goto RRETURN_14;
1917         case 15: goto RRETURN_15;
1918         case 16: goto RRETURN_16;
1919         case 17: goto RRETURN_17;
1920         case 18: goto RRETURN_18;
1921         case 19: goto RRETURN_19;
1922         case 20: goto RRETURN_20;
1923         case 21: goto RRETURN_21;
1924         case 22: goto RRETURN_22;
1925         case 24: goto RRETURN_24;
1926         case 26: goto RRETURN_26;
1927         case 27: goto RRETURN_27;
1928         case 28: goto RRETURN_28;
1929         case 29: goto RRETURN_29;
1930         case 30: goto RRETURN_30;
1931         case 31: goto RRETURN_31;
1932         case 38: goto RRETURN_38;
1933         case 40: goto RRETURN_40;
1934         case 42: goto RRETURN_42;
1935         case 44: goto RRETURN_44;
1936         case 48: goto RRETURN_48;
1937         case 52: goto RRETURN_52;
1938     }
1939     
1940     ASSERT_NOT_REACHED();
1941     return matchError(JSRegExpErrorInternal, stack);
1942     
1943 #endif
1944     
1945 RETURN:
1946     ASSERT(is_match == MATCH_MATCH || is_match == MATCH_NOMATCH);
1947     return is_match;
1948 }
1949
1950
1951 /*************************************************
1952 *         Execute a Regular Expression           *
1953 *************************************************/
1954
1955 /* This function applies a compiled re to a subject string and picks out
1956 portions of the string if it matches. Two elements in the vector are set for
1957 each substring: the offsets to the start and end of the substring.
1958
1959 Arguments:
1960   re              points to the compiled expression
1961   extra_data      points to extra data or is NULL
1962   subject         points to the subject string
1963   length          length of subject string (may contain binary zeros)
1964   start_offset    where to start in the subject string
1965   options         option bits
1966   offsets         points to a vector of ints to be filled in with offsets
1967   offsetcount     the number of elements in the vector
1968
1969 Returns:          > 0 => success; value is the number of elements filled in
1970                   = 0 => success, but offsets is not big enough
1971                    -1 => failed to match
1972                  < -1 => some kind of unexpected problem
1973 */
1974
1975 static void tryFirstByteOptimization(const UChar*& subjectPtr, const UChar* endSubject, int first_byte, bool first_byte_caseless, bool useMultiLineFirstCharOptimization, const UChar* originalSubjectStart)
1976 {
1977     // If first_byte is set, try scanning to the first instance of that byte
1978     // no need to try and match against any earlier part of the subject string.
1979     if (first_byte >= 0) {
1980         UChar first_char = first_byte;
1981         if (first_byte_caseless)
1982             while (subjectPtr < endSubject) {
1983                 int c = *subjectPtr;
1984                 if (c > 127)
1985                     break;
1986                 if (toLowerCase(c) == first_char)
1987                     break;
1988                 subjectPtr++;
1989             }
1990         else {
1991             while (subjectPtr < endSubject && *subjectPtr != first_char)
1992                 subjectPtr++;
1993         }
1994     } else if (useMultiLineFirstCharOptimization) {
1995         /* Or to just after \n for a multiline match if possible */
1996         // I'm not sure why this != originalSubjectStart check is necessary -- ecs 11/18/07
1997         if (subjectPtr > originalSubjectStart) {
1998             while (subjectPtr < endSubject && !isNewline(subjectPtr[-1]))
1999                 subjectPtr++;
2000         }
2001     }
2002 }
2003
2004 static bool tryRequiredByteOptimization(const UChar*& subjectPtr, const UChar* endSubject, int req_byte, int req_byte2, bool req_byte_caseless, bool hasFirstByte, const UChar*& req_byte_ptr)
2005 {
2006     /* If req_byte is set, we know that that character must appear in the subject
2007      for the match to succeed. If the first character is set, req_byte must be
2008      later in the subject; otherwise the test starts at the match point. This
2009      optimization can save a huge amount of backtracking in patterns with nested
2010      unlimited repeats that aren't going to match. Writing separate code for
2011      cased/caseless versions makes it go faster, as does using an autoincrement
2012      and backing off on a match.
2013      
2014      HOWEVER: when the subject string is very, very long, searching to its end can
2015      take a long time, and give bad performance on quite ordinary patterns. This
2016      showed up when somebody was matching /^C/ on a 32-megabyte string... so we
2017      don't do this when the string is sufficiently long.
2018     */
2019
2020     if (req_byte >= 0 && endSubject - subjectPtr < REQ_BYTE_MAX) {
2021         const UChar* p = subjectPtr + (hasFirstByte ? 1 : 0);
2022
2023         /* We don't need to repeat the search if we haven't yet reached the
2024          place we found it at last time. */
2025
2026         if (p > req_byte_ptr) {
2027             if (req_byte_caseless) {
2028                 while (p < endSubject) {
2029                     int pp = *p++;
2030                     if (pp == req_byte || pp == req_byte2) {
2031                         p--;
2032                         break;
2033                     }
2034                 }
2035             } else {
2036                 while (p < endSubject) {
2037                     if (*p++ == req_byte) {
2038                         p--;
2039                         break;
2040                     }
2041                 }
2042             }
2043
2044             /* If we can't find the required character, break the matching loop */
2045
2046             if (p >= endSubject)
2047                 return true;
2048
2049             /* If we have found the required character, save the point where we
2050              found it, so that we don't search again next time round the loop if
2051              the start hasn't passed this character yet. */
2052
2053             req_byte_ptr = p;
2054         }
2055     }
2056     return false;
2057 }
2058
2059 int jsRegExpExecute(const JSRegExp* re,
2060                     const UChar* subject, int length, int start_offset, int* offsets,
2061                     int offsetcount)
2062 {
2063     ASSERT(re);
2064     ASSERT(subject);
2065     ASSERT(offsetcount >= 0);
2066     ASSERT(offsets || offsetcount == 0);
2067     
2068     MatchData match_block;
2069     match_block.start_subject = subject;
2070     match_block.end_subject = match_block.start_subject + length;
2071     const UChar* end_subject = match_block.end_subject;
2072     
2073     match_block.multiline = (re->options & MatchAcrossMultipleLinesOption);
2074     match_block.ignoreCase = (re->options & IgnoreCaseOption);
2075     
2076     /* If the expression has got more back references than the offsets supplied can
2077      hold, we get a temporary chunk of working store to use during the matching.
2078      Otherwise, we can use the vector supplied, rounding down its size to a multiple
2079      of 3. */
2080     
2081     int ocount = offsetcount - (offsetcount % 3);
2082     
2083     // FIXME: This is lame that we have to second-guess our caller here.
2084     // The API should change to either fail-hard when we don't have enough offset space
2085     // or that we shouldn't ask our callers to pre-allocate in the first place.
2086     bool using_temporary_offsets = false;
2087     if (re->top_backref > 0 && re->top_backref >= ocount/3) {
2088         ocount = re->top_backref * 3 + 3;
2089         match_block.offset_vector = new int[ocount];
2090         if (!match_block.offset_vector)
2091             return JSRegExpErrorNoMemory;
2092         using_temporary_offsets = true;
2093         ASSERT_NOT_REACHED(); // Fail debug builds -- No one should be hitting this vestigal (slow!) code, see comment above.
2094     } else
2095         match_block.offset_vector = offsets;
2096     
2097     match_block.offset_end = ocount;
2098     match_block.offset_max = (2*ocount)/3;
2099     match_block.offset_overflow = false;
2100     
2101     /* Compute the minimum number of offsets that we need to reset each time. Doing
2102      this makes a huge difference to execution time when there aren't many brackets
2103      in the pattern. */
2104     
2105     int resetcount = 2 + re->top_bracket * 2;
2106     if (resetcount > offsetcount)
2107         resetcount = ocount;
2108     
2109     /* Reset the working variable associated with each extraction. These should
2110      never be used unless previously set, but they get saved and restored, and so we
2111      initialize them to avoid reading uninitialized locations. */
2112     
2113     if (match_block.offset_vector) {
2114         int* iptr = match_block.offset_vector + ocount;
2115         int* iend = iptr - resetcount/2 + 1;
2116         while (--iptr >= iend)
2117             *iptr = -1;
2118     }
2119     
2120     /* Set up the first character to match, if available. The first_byte value is
2121      never set for an anchored regular expression, but the anchoring may be forced
2122      at run time, so we have to test for anchoring. The first char may be unset for
2123      an unanchored pattern, of course. If there's no first char and the pattern was
2124      studied, there may be a bitmap of possible first characters. */
2125     
2126     bool first_byte_caseless = false;
2127     int first_byte = -1;
2128     if (re->options & UseFirstByteOptimizationOption) {
2129         first_byte = re->first_byte & 255;
2130         if ((first_byte_caseless = (re->first_byte & REQ_IGNORE_CASE)))
2131             first_byte = toLowerCase(first_byte);
2132     }
2133     
2134     /* For anchored or unanchored matches, there may be a "last known required
2135      character" set. */
2136     
2137     bool req_byte_caseless = false;
2138     int req_byte = -1;
2139     int req_byte2 = -1;
2140     if (re->options & UseRequiredByteOptimizationOption) {
2141         req_byte = re->req_byte & 255; // FIXME: This optimization could be made to work for UTF16 chars as well...
2142         req_byte_caseless = (re->req_byte & REQ_IGNORE_CASE);
2143         req_byte2 = flipCase(req_byte);
2144     }
2145     
2146     /* Loop for handling unanchored repeated matching attempts; for anchored regexs
2147      the loop runs just once. */
2148     
2149     const UChar* start_match = subject + start_offset;
2150     const UChar* req_byte_ptr = start_match - 1;
2151     bool useMultiLineFirstCharOptimization = re->options & UseMultiLineFirstByteOptimizationOption;
2152     
2153     do {
2154         /* Reset the maximum number of extractions we might see. */
2155         if (match_block.offset_vector) {
2156             int* iptr = match_block.offset_vector;
2157             int* iend = iptr + resetcount;
2158             while (iptr < iend)
2159                 *iptr++ = -1;
2160         }
2161         
2162         tryFirstByteOptimization(start_match, end_subject, first_byte, first_byte_caseless, useMultiLineFirstCharOptimization, match_block.start_subject + start_offset);
2163         if (tryRequiredByteOptimization(start_match, end_subject, req_byte, req_byte2, req_byte_caseless, first_byte >= 0, req_byte_ptr))
2164             break;
2165                 
2166         /* When a match occurs, substrings will be set for all internal extractions;
2167          we just need to set up the whole thing as substring 0 before returning. If
2168          there were too many extractions, set the return code to zero. In the case
2169          where we had to get some local store to hold offsets for backreferences, copy
2170          those back references that we can. In this case there need not be overflow
2171          if certain parts of the pattern were not used. */
2172         
2173         /* The code starts after the JSRegExp block and the capture name table. */
2174         const uschar* start_code = (const uschar*)(re + 1);
2175         
2176         int returnCode = match(start_match, start_code, 2, match_block);
2177         
2178         /* When the result is no match, if the subject's first character was a
2179          newline and the PCRE_FIRSTLINE option is set, break (which will return
2180          PCRE_ERROR_NOMATCH). The option requests that a match occur before the first
2181          newline in the subject. Otherwise, advance the pointer to the next character
2182          and continue - but the continuation will actually happen only when the
2183          pattern is not anchored. */
2184         
2185         if (returnCode == MATCH_NOMATCH) {
2186             start_match++;
2187             if (start_match < end_subject && isTrailingSurrogate(*start_match))
2188                 start_match++;
2189             continue;
2190         }
2191         
2192         if (returnCode != MATCH_MATCH) {
2193             DPRINTF((">>>> error: returning %d\n", rc));
2194             return returnCode;
2195         }
2196         
2197         /* We have a match! Copy the offset information from temporary store if
2198          necessary */
2199         
2200         if (using_temporary_offsets) {
2201             if (offsetcount >= 4) {
2202                 memcpy(offsets + 2, match_block.offset_vector + 2, (offsetcount - 2) * sizeof(int));
2203                 DPRINTF(("Copied offsets from temporary memory\n"));
2204             }
2205             if (match_block.end_offset_top > offsetcount)
2206                 match_block.offset_overflow = true;
2207             
2208             DPRINTF(("Freeing temporary memory\n"));
2209             delete [] match_block.offset_vector;
2210         }
2211         
2212         returnCode = match_block.offset_overflow ? 0 : match_block.end_offset_top / 2;
2213         
2214         if (offsetcount < 2)
2215             returnCode = 0;
2216         else {
2217             offsets[0] = start_match - match_block.start_subject;
2218             offsets[1] = match_block.end_match_ptr - match_block.start_subject;
2219         }
2220         
2221         DPRINTF((">>>> returning %d\n", rc));
2222         return returnCode;
2223     } while (start_match <= end_subject);
2224     
2225     if (using_temporary_offsets) {
2226         DPRINTF(("Freeing temporary memory\n"));
2227         delete [] match_block.offset_vector;
2228     }
2229     
2230     DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
2231     return JSRegExpErrorNoMatch;
2232 }