2007-11-15 Eric Seidel <eric@webkit.org>
[WebKit-https.git] / JavaScriptCore / pcre / pcre_exec.cpp
1 /* This is JavaScriptCore's variant of the PCRE library. While this library
2 started out as a copy of PCRE, many of the features of PCRE have been
3 removed. This library now supports only the regular expression features
4 required by the JavaScript language specification, and has only the functions
5 needed by JavaScriptCore and the rest of WebKit.
6
7                  Originally written by Philip Hazel
8            Copyright (c) 1997-2006 University of Cambridge
9     Copyright (C) 2002, 2004, 2006, 2007 Apple Inc. All rights reserved.
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15     * Redistributions of source code must retain the above copyright notice,
16       this list of conditions and the following disclaimer.
17
18     * Redistributions in binary form must reproduce the above copyright
19       notice, this list of conditions and the following disclaimer in the
20       documentation and/or other materials provided with the distribution.
21
22     * Neither the name of the University of Cambridge nor the names of its
23       contributors may be used to endorse or promote products derived from
24       this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 /* This module contains jsRegExpExecute(), the externally visible function
41 that does pattern matching using an NFA algorithm, following the rules from
42 the JavaScript specification. There are also some supporting functions. */
43
44 #include "config.h"
45
46 #include "pcre_internal.h"
47
48 #include <wtf/ASCIICType.h>
49 #include <wtf/Vector.h>
50
51 using namespace WTF;
52
53 #ifdef __GNUC__
54 #define USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
55 //#define USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
56 #endif
57
58 /* Avoid warnings on Windows. */
59 #undef min
60 #undef max
61
62 /* Structure for building a chain of data that actually lives on the
63 stack, for holding the values of the subject pointer at the start of each
64 subpattern, so as to detect when an empty string has been matched by a
65 subpattern - to break infinite loops. When NO_RECURSE is set, these blocks
66 are on the heap, not on the stack. */
67
68 typedef struct eptrblock {
69   struct eptrblock *epb_prev;
70   USPTR epb_saved_eptr;
71 } eptrblock;
72
73 /* Structure for remembering the local variables in a private frame */
74
75 typedef struct matchframe {
76   /* Where to jump back to */
77 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
78   int where;
79 #else
80   void *where;
81 #endif
82
83   struct matchframe *prevframe;
84
85   /* Function arguments that may change */
86
87   const pcre_uchar *eptr;
88   const uschar *ecode;
89   int offset_top;
90   eptrblock *eptrb;
91
92   /* Function local variables */
93
94   const uschar *data;
95   const uschar *next;
96   const pcre_uchar *pp;
97   const uschar *prev;
98   const pcre_uchar *saved_eptr;
99
100   int repeat_othercase;
101
102   int ctype;
103   int fc;
104   int fi;
105   int length;
106   int max;
107   int number;
108   int offset;
109   int save_offset1, save_offset2, save_offset3;
110
111   eptrblock newptrb;
112 } matchframe;
113
114 /* Structure for passing "static" information around between the functions
115 doing traditional NFA matching, so that they are thread-safe. */
116
117 typedef struct match_data {
118   unsigned long int match_call_count;      /* As it says */
119   int   *offset_vector;         /* Offset vector */
120   int    offset_end;            /* One past the end */
121   int    offset_max;            /* The maximum usable for return data */
122   const uschar *lcc;            /* Points to lower casing table */
123   const uschar *ctypes;         /* Points to table of type maps */
124   BOOL   offset_overflow;       /* Set if too many extractions */
125   USPTR  start_subject;         /* Start of the subject string */
126   USPTR  end_subject;           /* End of the subject string */
127   USPTR  end_match_ptr;         /* Subject position at end match */
128   int    end_offset_top;        /* Highwater mark at end of match */
129   BOOL   multiline;
130   BOOL   caseless;
131 } match_data;
132
133 #define match_isgroup      true    /* Set if start of bracketed group */
134
135 /* Non-error returns from the match() function. Error returns are externally
136 defined PCRE_ERROR_xxx codes, which are all negative. */
137
138 #define MATCH_MATCH        1
139 #define MATCH_NOMATCH      0
140
141 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
142
143 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
144 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
145
146
147
148 #ifdef DEBUG
149 /*************************************************
150 *        Debugging function to print chars       *
151 *************************************************/
152
153 /* Print a sequence of chars in printable format, stopping at the end of the
154 subject if the requested.
155
156 Arguments:
157   p           points to characters
158   length      number to print
159   is_subject  true if printing from within md->start_subject
160   md          pointer to matching data block, if is_subject is true
161
162 Returns:     nothing
163 */
164
165 static void
166 pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
167 {
168 int c;
169 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
170 while (length-- > 0)
171   if (isprint(c = *(p++))) printf("%c", c);
172   else if (c < 256) printf("\\x%02x", c);
173   else printf("\\x{%x}", c);
174 }
175 #endif
176
177
178
179 /*************************************************
180 *          Match a back-reference                *
181 *************************************************/
182
183 /* If a back reference hasn't been set, the length that is passed is greater
184 than the number of characters left in the string, so the match fails.
185
186 Arguments:
187   offset      index into the offset vector
188   eptr        points into the subject
189   length      length to be matched
190   md          points to match data block
191
192 Returns:      true if matched
193 */
194
195 static BOOL
196 match_ref(int offset, register USPTR eptr, int length, match_data *md)
197 {
198 USPTR p = md->start_subject + md->offset_vector[offset];
199
200 #ifdef DEBUG
201 if (eptr >= md->end_subject)
202   printf("matching subject <null>");
203 else
204   {
205   printf("matching subject ");
206   pchars(eptr, length, true, md);
207   }
208 printf(" against backref ");
209 pchars(p, length, false, md);
210 printf("\n");
211 #endif
212
213 /* Always fail if not enough characters left */
214
215 if (length > md->end_subject - eptr) return false;
216
217 /* Separate the caselesss case for speed */
218
219 if (md->caseless)
220   {
221   while (length-- > 0)
222     {
223     pcre_uchar c = *p++;
224     int othercase = _pcre_ucp_othercase(c);
225     pcre_uchar d = *eptr++;
226     if (c != d && othercase != d) return false;
227     }
228   }
229 else
230   { while (length-- > 0) if (*p++ != *eptr++) return false; }
231
232 return true;
233 }
234
235
236
237 /***************************************************************************
238 ****************************************************************************
239                    RECURSION IN THE match() FUNCTION
240
241 The original match() function was highly recursive. The current version
242 still has the remnants of the original in that recursive processing of the
243 regular expression is triggered by invoking a macro named RMATCH. This is
244 no longer really much like a recursive call to match() itself.
245 ****************************************************************************
246 ***************************************************************************/
247
248 /* These versions of the macros use the stack, as normal. There are debugging
249 versions and production versions. */
250
251 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
252
253 /* Use numbered labels and switch statement at the bottom of the match function. */
254
255 #define RMATCH_WHERE(num) num
256 #define RRETURN_LABEL RRETURN_SWITCH
257
258 #else
259
260 /* Use GCC's computed goto extension. */
261
262 /* For one test case this is more than 40% faster than the switch statement.
263 We could avoid the use of the num argument entirely by using local labels,
264 but using it for the GCC case as well as the non-GCC case allows us to share
265 a bit more code and notice if we use conflicting numbers.*/
266
267 #define RMATCH_WHERE(num) &&RRETURN_##num
268 #define RRETURN_LABEL *stack.currentFrame->where
269
270 #endif
271
272 #define RMATCH(num, ra, rb, rc)\
273   {\
274   if (stack.currentFrame >= stack.frames && stack.currentFrame + 1 < stack.framesEnd)\
275     newframe = stack.currentFrame + 1;\
276   else\
277     newframe = new matchframe;\
278   newframe->eptr = stack.currentFrame->eptr;\
279   newframe->ecode = (ra);\
280   newframe->offset_top = stack.currentFrame->offset_top;\
281   newframe->eptrb = (rb);\
282   is_group_start = (rc);\
283   ++rdepth;\
284   newframe->prevframe = stack.currentFrame;\
285   stack.currentFrame = newframe;\
286   stack.currentFrame->where = RMATCH_WHERE(num);\
287   DPRINTF(("restarting from line %d\n", __LINE__));\
288   goto RECURSE;\
289 RRETURN_##num:\
290   newframe = stack.currentFrame;\
291   stack.currentFrame = stack.currentFrame->prevframe;\
292   if (!(newframe >= stack.frames && newframe < stack.framesEnd))\
293     delete newframe;\
294   --rdepth;\
295   DPRINTF(("did a goto back to line %d\n", __LINE__));\
296   }
297  
298 #define RRETURN goto RRETURN_LABEL
299
300 #define RRETURN_NO_MATCH \
301   {\
302     is_match = false;\
303     RRETURN;\
304   }
305
306 #define RRETURN_ERROR(error) \
307     return matchError(error, stack);
308
309 /*************************************************
310 *         Match from current position            *
311 *************************************************/
312
313 /* On entry ecode points to the first opcode, and eptr to the first character
314 in the subject string, while eptrb holds the value of eptr at the start of the
315 last bracketed group - used for breaking infinite loops matching zero-length
316 strings. This function is called recursively in many circumstances. Whenever it
317 returns a negative (error) response, the outer incarnation must also return the
318 same response.
319
320 Arguments:
321    eptr        pointer in subject
322    ecode       position in code
323    offset_top  current top pointer
324    md          pointer to "static" info for the match
325
326 Returns:       MATCH_MATCH if matched            )  these values are >= 0
327                MATCH_NOMATCH if failed to match  )
328                a negative PCRE_ERROR_xxx value if aborted by an error condition
329                  (e.g. stopped by repeated call or recursion limit)
330 */
331
332 struct MatchStack {
333     MatchStack()
334     {
335         framesEnd = frames + sizeof(frames) / sizeof(frames[0]);
336         currentFrame = frames;
337     }
338     matchframe frames[16];
339     matchframe* framesEnd;
340     matchframe* currentFrame;
341
342     void unrollAnyHeapAllocatedFrames()
343     {
344         while (!(currentFrame >= frames && currentFrame < framesEnd)) {
345             matchframe* parentFrame = currentFrame->prevframe;
346             delete currentFrame;
347             currentFrame = parentFrame;
348         }
349     }
350 };
351
352 static int matchError(int errorCode, MatchStack& stack)
353 {
354     stack.unrollAnyHeapAllocatedFrames();
355     return errorCode;
356 }
357
358 static int match(USPTR eptr, const uschar* ecode, int offset_top, match_data* md)
359 {
360     register int is_match = false;
361     register int i;
362     register int c;
363     
364     unsigned rdepth = 0;
365     
366     BOOL cur_is_word;
367     BOOL prev_is_word;
368     BOOL is_group_start = true;
369     int min;
370     BOOL minimize = false; /* Initialization not really needed, but some compilers think so. */
371     
372     /* The value 16 here is large enough that most regular expressions don't require
373      any calls to pcre_stack_malloc, yet the amount of stack used for the array is
374      modest enough that we don't run out of stack. */
375     MatchStack stack;
376     matchframe* newframe;
377     
378     /* The opcode jump table. */
379 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
380 #define EMIT_JUMP_TABLE_ENTRY(opcode) &&LABEL_OP_##opcode,
381     static void* opcode_jump_table[256] = { FOR_EACH_OPCODE(EMIT_JUMP_TABLE_ENTRY) };
382 #undef EMIT_JUMP_TABLE_ENTRY
383 #endif
384     
385     /* One-time setup of the opcode jump table. */
386 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
387     i = 255;
388     while (!opcode_jump_table[i])
389         opcode_jump_table[i--] = &&CAPTURING_BRACKET;
390 #endif
391     
392 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
393     stack.currentFrame->where = &&RETURN;
394 #else
395     stack.currentFrame->where = 0;
396 #endif
397     
398     stack.currentFrame->eptr = eptr;
399     stack.currentFrame->ecode = ecode;
400     stack.currentFrame->offset_top = offset_top;
401     stack.currentFrame->eptrb = NULL;
402     
403     /* This is where control jumps back to to effect "recursion" */
404     
405 RECURSE:
406     
407     /* OK, now we can get on with the real code of the function. Recursive calls
408      are specified by the macro RMATCH and RRETURN is used to return. When
409      NO_RECURSE is *not* defined, these just turn into a recursive call to match()
410      and a "return", respectively (possibly with some debugging if DEBUG is
411      defined). However, RMATCH isn't like a function call because it's quite a
412      complicated macro. It has to be used in one particular way. This shouldn't,
413      however, impact performance when true recursion is being used. */
414     
415     /* First check that we haven't called match() too many times, or that we
416      haven't exceeded the recursive call limit. */
417     
418     if (md->match_call_count++ >= MATCH_LIMIT)
419         RRETURN_ERROR(JSRegExpErrorMatchLimit);
420     if (rdepth >= MATCH_LIMIT_RECURSION)
421         RRETURN_ERROR(JSRegExpErrorRecursionLimit);
422     
423     /* At the start of a bracketed group, add the current subject pointer to the
424      stack of such pointers, to be re-instated at the end of the group when we hit
425      the closing ket. When match() is called in other circumstances, we don't add to
426      this stack. */
427     
428     if (is_group_start) {
429         stack.currentFrame->newptrb.epb_prev = stack.currentFrame->eptrb;
430         stack.currentFrame->newptrb.epb_saved_eptr = stack.currentFrame->eptr;
431         stack.currentFrame->eptrb = &stack.currentFrame->newptrb;
432     }
433     
434     /* Now start processing the operations. */
435     
436 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
437     while (true)
438 #endif
439     {
440         
441 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
442 #define BEGIN_OPCODE(opcode) LABEL_OP_##opcode
443 #define NEXT_OPCODE goto *opcode_jump_table[*stack.currentFrame->ecode]
444 #else
445 #define BEGIN_OPCODE(opcode) case OP_##opcode
446 #define NEXT_OPCODE continue
447 #endif
448         
449 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
450         NEXT_OPCODE;
451 #else
452         switch (*stack.currentFrame->ecode)
453 #endif
454         {
455                 /* Non-capturing bracket: optimized */
456                 
457                 BEGIN_OPCODE(BRA):
458             NON_CAPTURING_BRACKET:
459                 DPRINTF(("start bracket 0\n"));
460                 do {
461                     RMATCH(2, stack.currentFrame->ecode + 1 + LINK_SIZE, stack.currentFrame->eptrb, match_isgroup);
462                     if (is_match)
463                         RRETURN;
464                     stack.currentFrame->ecode += GET(stack.currentFrame->ecode, 1);
465                 } while (*stack.currentFrame->ecode == OP_ALT);
466                 DPRINTF(("bracket 0 failed\n"));
467                 RRETURN;
468                 
469                 /* Skip over large extraction number data if encountered. */
470                 
471                 BEGIN_OPCODE(BRANUMBER):
472                 stack.currentFrame->ecode += 3;
473                 NEXT_OPCODE;
474                 
475                 /* End of the pattern. */
476                 
477                 BEGIN_OPCODE(END):
478                 md->end_match_ptr = stack.currentFrame->eptr;          /* Record where we ended */
479                 md->end_offset_top = stack.currentFrame->offset_top;   /* and how many extracts were taken */
480                 is_match = true;
481                 RRETURN;
482                 
483                 /* Assertion brackets. Check the alternative branches in turn - the
484                  matching won't pass the KET for an assertion. If any one branch matches,
485                  the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
486                  start of each branch to move the current point backwards, so the code at
487                  this level is identical to the lookahead case. */
488                 
489                 BEGIN_OPCODE(ASSERT):
490                 do {
491                     RMATCH(6, stack.currentFrame->ecode + 1 + LINK_SIZE, NULL, match_isgroup);
492                     if (is_match) break;
493                     stack.currentFrame->ecode += GET(stack.currentFrame->ecode, 1);
494                 } while (*stack.currentFrame->ecode == OP_ALT);
495                 if (*stack.currentFrame->ecode == OP_KET)
496                     RRETURN_NO_MATCH;
497                 
498                 /* Continue from after the assertion, updating the offsets high water
499                  mark, since extracts may have been taken during the assertion. */
500                 
501                 do stack.currentFrame->ecode += GET(stack.currentFrame->ecode,1); while (*stack.currentFrame->ecode == OP_ALT);
502                 stack.currentFrame->ecode += 1 + LINK_SIZE;
503                 stack.currentFrame->offset_top = md->end_offset_top;
504                 NEXT_OPCODE;
505                 
506                 /* Negative assertion: all branches must fail to match */
507                 
508                 BEGIN_OPCODE(ASSERT_NOT):
509                 do {
510                     RMATCH(7, stack.currentFrame->ecode + 1 + LINK_SIZE, NULL, match_isgroup);
511                     if (is_match)
512                         RRETURN_NO_MATCH;
513                     stack.currentFrame->ecode += GET(stack.currentFrame->ecode,1);
514                 } while (*stack.currentFrame->ecode == OP_ALT);
515                 
516                 stack.currentFrame->ecode += 1 + LINK_SIZE;
517                 NEXT_OPCODE;
518                 
519                 /* "Once" brackets are like assertion brackets except that after a match,
520                  the point in the subject string is not moved back. Thus there can never be
521                  a move back into the brackets. Friedl calls these "atomic" subpatterns.
522                  Check the alternative branches in turn - the matching won't pass the KET
523                  for this kind of subpattern. If any one branch matches, we carry on as at
524                  the end of a normal bracket, leaving the subject pointer. */
525                 
526                 BEGIN_OPCODE(ONCE):
527                 stack.currentFrame->prev = stack.currentFrame->ecode;
528                 stack.currentFrame->saved_eptr = stack.currentFrame->eptr;
529                 
530                 do {
531                     RMATCH(9, stack.currentFrame->ecode + 1 + LINK_SIZE, stack.currentFrame->eptrb, match_isgroup);
532                     if (is_match)
533                         break;
534                     stack.currentFrame->ecode += GET(stack.currentFrame->ecode,1);
535                 } while (*stack.currentFrame->ecode == OP_ALT);
536                 
537                 /* If hit the end of the group (which could be repeated), fail */
538                 
539                 if (*stack.currentFrame->ecode != OP_ONCE && *stack.currentFrame->ecode != OP_ALT)
540                     RRETURN;
541                 
542                 /* Continue as from after the assertion, updating the offsets high water
543                  mark, since extracts may have been taken. */
544                 
545                 do stack.currentFrame->ecode += GET(stack.currentFrame->ecode,1); while (*stack.currentFrame->ecode == OP_ALT);
546                 
547                 stack.currentFrame->offset_top = md->end_offset_top;
548                 stack.currentFrame->eptr = md->end_match_ptr;
549                 
550                 /* For a non-repeating ket, just continue at this level. This also
551                  happens for a repeating ket if no characters were matched in the group.
552                  This is the forcible breaking of infinite loops as implemented in Perl
553                  5.005. If there is an options reset, it will get obeyed in the normal
554                  course of events. */
555                 
556                 if (*stack.currentFrame->ecode == OP_KET || stack.currentFrame->eptr == stack.currentFrame->saved_eptr) {
557                     stack.currentFrame->ecode += 1+LINK_SIZE;
558                     NEXT_OPCODE;
559                 }
560                 
561                 /* The repeating kets try the rest of the pattern or restart from the
562                  preceding bracket, in the appropriate order. We need to reset any options
563                  that changed within the bracket before re-running it, so check the next
564                  opcode. */
565                 
566                 if (*stack.currentFrame->ecode == OP_KETRMIN) {
567                     RMATCH(10, stack.currentFrame->ecode + 1 + LINK_SIZE, stack.currentFrame->eptrb, 0);
568                     if (is_match)
569                         RRETURN;
570                     RMATCH(11, stack.currentFrame->prev, stack.currentFrame->eptrb, match_isgroup);
571                     if (is_match)
572                         RRETURN;
573                 } else { /* OP_KETRMAX */
574                     RMATCH(12, stack.currentFrame->prev, stack.currentFrame->eptrb, match_isgroup);
575                     if (is_match)
576                         RRETURN;
577                     RMATCH(13, stack.currentFrame->ecode + 1+LINK_SIZE, stack.currentFrame->eptrb, 0);
578                     if (is_match)
579                         RRETURN;
580                 }
581                 RRETURN;
582                 
583                 /* An alternation is the end of a branch; scan along to find the end of the
584                  bracketed group and go to there. */
585                 
586                 BEGIN_OPCODE(ALT):
587                 do stack.currentFrame->ecode += GET(stack.currentFrame->ecode,1); while (*stack.currentFrame->ecode == OP_ALT);
588                 NEXT_OPCODE;
589                 
590                 /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
591                  that it may occur zero times. It may repeat infinitely, or not at all -
592                  i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
593                  repeat limits are compiled as a number of copies, with the optional ones
594                  preceded by BRAZERO or BRAMINZERO. */
595                 
596                 BEGIN_OPCODE(BRAZERO):
597                 {
598                     stack.currentFrame->next = stack.currentFrame->ecode+1;
599                     RMATCH(14, stack.currentFrame->next, stack.currentFrame->eptrb, match_isgroup);
600                     if (is_match)
601                         RRETURN;
602                     do stack.currentFrame->next += GET(stack.currentFrame->next,1); while (*stack.currentFrame->next == OP_ALT);
603                     stack.currentFrame->ecode = stack.currentFrame->next + 1+LINK_SIZE;
604                 }
605                 NEXT_OPCODE;
606                 
607                 BEGIN_OPCODE(BRAMINZERO):
608                 {
609                     stack.currentFrame->next = stack.currentFrame->ecode+1;
610                     do stack.currentFrame->next += GET(stack.currentFrame->next,1); while (*stack.currentFrame->next == OP_ALT);
611                     RMATCH(15, stack.currentFrame->next + 1+LINK_SIZE, stack.currentFrame->eptrb, match_isgroup);
612                     if (is_match)
613                         RRETURN;
614                     stack.currentFrame->ecode++;
615                 }
616                 NEXT_OPCODE;
617                 
618                 /* End of a group, repeated or non-repeating. If we are at the end of
619                  an assertion "group", stop matching and return MATCH_MATCH, but record the
620                  current high water mark for use by positive assertions. Do this also
621                  for the "once" (not-backup up) groups. */
622                 
623                 BEGIN_OPCODE(KET):
624                 BEGIN_OPCODE(KETRMIN):
625                 BEGIN_OPCODE(KETRMAX):
626                 stack.currentFrame->prev = stack.currentFrame->ecode - GET(stack.currentFrame->ecode, 1);
627                 stack.currentFrame->saved_eptr = stack.currentFrame->eptrb->epb_saved_eptr;
628                 
629                 /* Back up the stack of bracket start pointers. */
630                 
631                 stack.currentFrame->eptrb = stack.currentFrame->eptrb->epb_prev;
632                 
633                 if (*stack.currentFrame->prev == OP_ASSERT || *stack.currentFrame->prev == OP_ASSERT_NOT || *stack.currentFrame->prev == OP_ONCE) {
634                     md->end_match_ptr = stack.currentFrame->eptr;      /* For ONCE */
635                     md->end_offset_top = stack.currentFrame->offset_top;
636                     is_match = true;
637                     RRETURN;
638                 }
639                 
640                 /* In all other cases except a conditional group we have to check the
641                  group number back at the start and if necessary complete handling an
642                  extraction by setting the offsets and bumping the high water mark. */
643                 
644                 stack.currentFrame->number = *stack.currentFrame->prev - OP_BRA;
645                 
646                 /* For extended extraction brackets (large number), we have to fish out
647                  the number from a dummy opcode at the start. */
648                 
649                 if (stack.currentFrame->number > EXTRACT_BASIC_MAX)
650                     stack.currentFrame->number = GET2(stack.currentFrame->prev, 2+LINK_SIZE);
651                 stack.currentFrame->offset = stack.currentFrame->number << 1;
652                 
653 #ifdef DEBUG
654                 printf("end bracket %d", stack.currentFrame->number);
655                 printf("\n");
656 #endif
657                 
658                 /* Test for a numbered group. This includes groups called as a result
659                  of recursion. Note that whole-pattern recursion is coded as a recurse
660                  into group 0, so it won't be picked up here. Instead, we catch it when
661                  the OP_END is reached. */
662                 
663                 if (stack.currentFrame->number > 0) {
664                     if (stack.currentFrame->offset >= md->offset_max)
665                         md->offset_overflow = true;
666                     else {
667                         md->offset_vector[stack.currentFrame->offset] =
668                         md->offset_vector[md->offset_end - stack.currentFrame->number];
669                         md->offset_vector[stack.currentFrame->offset+1] = stack.currentFrame->eptr - md->start_subject;
670                         if (stack.currentFrame->offset_top <= stack.currentFrame->offset)
671                             stack.currentFrame->offset_top = stack.currentFrame->offset + 2;
672                     }
673                 }
674                 
675                 /* For a non-repeating ket, just continue at this level. This also
676                  happens for a repeating ket if no characters were matched in the group.
677                  This is the forcible breaking of infinite loops as implemented in Perl
678                  5.005. If there is an options reset, it will get obeyed in the normal
679                  course of events. */
680                 
681                 if (*stack.currentFrame->ecode == OP_KET || stack.currentFrame->eptr == stack.currentFrame->saved_eptr) {
682                     stack.currentFrame->ecode += 1 + LINK_SIZE;
683                     NEXT_OPCODE;
684                 }
685                 
686                 /* The repeating kets try the rest of the pattern or restart from the
687                  preceding bracket, in the appropriate order. */
688                 
689                 if (*stack.currentFrame->ecode == OP_KETRMIN) {
690                     RMATCH(16, stack.currentFrame->ecode + 1+LINK_SIZE, stack.currentFrame->eptrb, 0);
691                     if (is_match)
692                         RRETURN;
693                     RMATCH(17, stack.currentFrame->prev, stack.currentFrame->eptrb, match_isgroup);
694                     if (is_match)
695                         RRETURN;
696                 } else { /* OP_KETRMAX */
697                     RMATCH(18, stack.currentFrame->prev, stack.currentFrame->eptrb, match_isgroup);
698                     if (is_match)
699                         RRETURN;
700                     RMATCH(19, stack.currentFrame->ecode + 1+LINK_SIZE, stack.currentFrame->eptrb, 0);
701                     if (is_match)
702                         RRETURN;
703                 }
704                 RRETURN;
705                 
706                 /* Start of subject, or after internal newline if multiline. */
707                 
708                 BEGIN_OPCODE(CIRC):
709                 if (stack.currentFrame->eptr != md->start_subject && (!md->multiline || !isNewline(stack.currentFrame->eptr[-1])))
710                     RRETURN_NO_MATCH;
711                 stack.currentFrame->ecode++;
712                 NEXT_OPCODE;
713                 
714                 /* End of subject, or before internal newline if multiline. */
715                 
716                 BEGIN_OPCODE(DOLL):
717                 if (stack.currentFrame->eptr < md->end_subject && (!md->multiline || !isNewline(*stack.currentFrame->eptr)))
718                     RRETURN_NO_MATCH;
719                 stack.currentFrame->ecode++;
720                 NEXT_OPCODE;
721                 
722                 /* Word boundary assertions */
723                 
724                 BEGIN_OPCODE(NOT_WORD_BOUNDARY):
725                 BEGIN_OPCODE(WORD_BOUNDARY):
726                 /* Find out if the previous and current characters are "word" characters.
727                  It takes a bit more work in UTF-8 mode. Characters > 128 are assumed to
728                  be "non-word" characters. */
729                 
730                 if (stack.currentFrame->eptr == md->start_subject)
731                     prev_is_word = false;
732                 else {
733                     const pcre_uchar *lastptr = stack.currentFrame->eptr - 1;
734                     while(ISMIDCHAR(*lastptr))
735                         lastptr--;
736                     GETCHAR(c, lastptr);
737                     prev_is_word = c < 128 && (md->ctypes[c] & ctype_word) != 0;
738                 }
739                 if (stack.currentFrame->eptr >= md->end_subject)
740                     cur_is_word = false;
741                 else {
742                     GETCHAR(c, stack.currentFrame->eptr);
743                     cur_is_word = c < 128 && (md->ctypes[c] & ctype_word) != 0;
744                 }
745                 
746                 /* Now see if the situation is what we want */
747                 
748                 if ((*stack.currentFrame->ecode++ == OP_WORD_BOUNDARY) ? cur_is_word == prev_is_word : cur_is_word != prev_is_word)
749                     RRETURN_NO_MATCH;
750                 NEXT_OPCODE;
751                 
752                 /* Match a single character type; inline for speed */
753                 
754                 BEGIN_OPCODE(ANY):
755                 if (stack.currentFrame->eptr < md->end_subject && isNewline(*stack.currentFrame->eptr))
756                     RRETURN_NO_MATCH;
757                 if (stack.currentFrame->eptr++ >= md->end_subject)
758                     RRETURN_NO_MATCH;
759                 while (stack.currentFrame->eptr < md->end_subject && ISMIDCHAR(*stack.currentFrame->eptr))
760                     stack.currentFrame->eptr++;
761                 stack.currentFrame->ecode++;
762                 NEXT_OPCODE;
763                 
764                 BEGIN_OPCODE(NOT_DIGIT):
765                 if (stack.currentFrame->eptr >= md->end_subject)
766                     RRETURN_NO_MATCH;
767                 GETCHARINCTEST(c, stack.currentFrame->eptr);
768                 if (isASCIIDigit(c))
769                     RRETURN_NO_MATCH;
770                 stack.currentFrame->ecode++;
771                 NEXT_OPCODE;
772                 
773                 BEGIN_OPCODE(DIGIT):
774                 if (stack.currentFrame->eptr >= md->end_subject)
775                     RRETURN_NO_MATCH;
776                 GETCHARINCTEST(c, stack.currentFrame->eptr);
777                 if (!isASCIIDigit(c))
778                     RRETURN_NO_MATCH;
779                 stack.currentFrame->ecode++;
780                 NEXT_OPCODE;
781                 
782                 BEGIN_OPCODE(NOT_WHITESPACE):
783                 if (stack.currentFrame->eptr >= md->end_subject)
784                     RRETURN_NO_MATCH;
785                 GETCHARINCTEST(c, stack.currentFrame->eptr);
786                 if (c < 128 && (md->ctypes[c] & ctype_space))
787                     RRETURN_NO_MATCH;
788                 stack.currentFrame->ecode++;
789                 NEXT_OPCODE;
790                 
791                 BEGIN_OPCODE(WHITESPACE):
792                 if (stack.currentFrame->eptr >= md->end_subject)
793                     RRETURN_NO_MATCH;
794                 GETCHARINCTEST(c, stack.currentFrame->eptr);
795                 if (c >= 128 || !(md->ctypes[c] & ctype_space))
796                     RRETURN_NO_MATCH;
797                 stack.currentFrame->ecode++;
798                 NEXT_OPCODE;
799                 
800                 BEGIN_OPCODE(NOT_WORDCHAR):
801                 if (stack.currentFrame->eptr >= md->end_subject)
802                     RRETURN_NO_MATCH;
803                 GETCHARINCTEST(c, stack.currentFrame->eptr);
804                 if (c < 128 && (md->ctypes[c] & ctype_word))
805                     RRETURN_NO_MATCH;
806                 stack.currentFrame->ecode++;
807                 NEXT_OPCODE;
808                 
809                 BEGIN_OPCODE(WORDCHAR):
810                 if (stack.currentFrame->eptr >= md->end_subject)
811                     RRETURN_NO_MATCH;
812                 GETCHARINCTEST(c, stack.currentFrame->eptr);
813                 if (c >= 128 || !(md->ctypes[c] & ctype_word))
814                     RRETURN_NO_MATCH;
815                 stack.currentFrame->ecode++;
816                 NEXT_OPCODE;
817                 
818                 /* Match a back reference, possibly repeatedly. Look past the end of the
819                  item to see if there is repeat information following. The code is similar
820                  to that for character classes, but repeated for efficiency. Then obey
821                  similar code to character type repeats - written out again for speed.
822                  However, if the referenced string is the empty string, always treat
823                  it as matched, any number of times (otherwise there could be infinite
824                  loops). */
825                 
826                 BEGIN_OPCODE(REF):
827                 stack.currentFrame->offset = GET2(stack.currentFrame->ecode, 1) << 1;               /* Doubled ref number */
828                 stack.currentFrame->ecode += 3;                                 /* Advance past item */
829                 
830                 /* If the reference is unset, set the length to be longer than the amount
831                  of subject left; this ensures that every attempt at a match fails. We
832                  can't just fail here, because of the possibility of quantifiers with zero
833                  minima. */
834                 
835                 if (stack.currentFrame->offset >= stack.currentFrame->offset_top || md->offset_vector[stack.currentFrame->offset] < 0)
836                     stack.currentFrame->length = 0;
837                 else
838                     stack.currentFrame->length = md->offset_vector[stack.currentFrame->offset+1] - md->offset_vector[stack.currentFrame->offset];
839                 
840                 /* Set up for repetition, or handle the non-repeated case */
841                 
842                 switch (*stack.currentFrame->ecode) {
843                 case OP_CRSTAR:
844                 case OP_CRMINSTAR:
845                 case OP_CRPLUS:
846                 case OP_CRMINPLUS:
847                 case OP_CRQUERY:
848                 case OP_CRMINQUERY:
849                     c = *stack.currentFrame->ecode++ - OP_CRSTAR;
850                     minimize = (c & 1) != 0;
851                     min = rep_min[c];                 /* Pick up values from tables; */
852                     stack.currentFrame->max = rep_max[c];                 /* zero for max => infinity */
853                     if (stack.currentFrame->max == 0)
854                         stack.currentFrame->max = INT_MAX;
855                     break;
856                     
857                 case OP_CRRANGE:
858                 case OP_CRMINRANGE:
859                     minimize = (*stack.currentFrame->ecode == OP_CRMINRANGE);
860                     min = GET2(stack.currentFrame->ecode, 1);
861                     stack.currentFrame->max = GET2(stack.currentFrame->ecode, 3);
862                     if (stack.currentFrame->max == 0)
863                         stack.currentFrame->max = INT_MAX;
864                     stack.currentFrame->ecode += 5;
865                     break;
866                 
867                 default:               /* No repeat follows */
868                     if (!match_ref(stack.currentFrame->offset, stack.currentFrame->eptr, stack.currentFrame->length, md))
869                         RRETURN_NO_MATCH;
870                     stack.currentFrame->eptr += stack.currentFrame->length;
871                     NEXT_OPCODE;
872                 }
873                 
874                 /* If the length of the reference is zero, just continue with the
875                  main loop. */
876                 
877                 if (stack.currentFrame->length == 0)
878                     NEXT_OPCODE;
879                 
880                 /* First, ensure the minimum number of matches are present. */
881                 
882                 for (i = 1; i <= min; i++) {
883                     if (!match_ref(stack.currentFrame->offset, stack.currentFrame->eptr, stack.currentFrame->length, md))
884                         RRETURN_NO_MATCH;
885                     stack.currentFrame->eptr += stack.currentFrame->length;
886                 }
887                 
888                 /* If min = max, continue at the same level without recursion.
889                  They are not both allowed to be zero. */
890                 
891                 if (min == stack.currentFrame->max)
892                     NEXT_OPCODE;
893                 
894                 /* If minimizing, keep trying and advancing the pointer */
895                 
896                 if (minimize) {
897                     for (stack.currentFrame->fi = min;; stack.currentFrame->fi++) {
898                         RMATCH(20, stack.currentFrame->ecode, stack.currentFrame->eptrb, 0);
899                         if (is_match)
900                             RRETURN;
901                         if (stack.currentFrame->fi >= stack.currentFrame->max || !match_ref(stack.currentFrame->offset, stack.currentFrame->eptr, stack.currentFrame->length, md))
902                             RRETURN;
903                         stack.currentFrame->eptr += stack.currentFrame->length;
904                     }
905                     ASSERT_NOT_REACHED();
906                 }
907                 
908                 /* If maximizing, find the longest string and work backwards */
909                 
910                 else {
911                     stack.currentFrame->pp = stack.currentFrame->eptr;
912                     for (i = min; i < stack.currentFrame->max; i++) {
913                         if (!match_ref(stack.currentFrame->offset, stack.currentFrame->eptr, stack.currentFrame->length, md))
914                             break;
915                         stack.currentFrame->eptr += stack.currentFrame->length;
916                     }
917                     while (stack.currentFrame->eptr >= stack.currentFrame->pp) {
918                         RMATCH(21, stack.currentFrame->ecode, stack.currentFrame->eptrb, 0);
919                         if (is_match)
920                             RRETURN;
921                         stack.currentFrame->eptr -= stack.currentFrame->length;
922                     }
923                     RRETURN_NO_MATCH;
924                 }
925                 ASSERT_NOT_REACHED();
926                 
927                 /* Match a bit-mapped character class, possibly repeatedly. This op code is
928                  used when all the characters in the class have values in the range 0-255,
929                  and either the matching is caseful, or the characters are in the range
930                  0-127 when UTF-8 processing is enabled. The only difference between
931                  OP_CLASS and OP_NCLASS occurs when a data character outside the range is
932                  encountered.
933                  
934                  First, look past the end of the item to see if there is repeat information
935                  following. Then obey similar code to character type repeats - written out
936                  again for speed. */
937                 
938                 BEGIN_OPCODE(NCLASS):
939                 BEGIN_OPCODE(CLASS):
940                 stack.currentFrame->data = stack.currentFrame->ecode + 1;                /* Save for matching */
941                 stack.currentFrame->ecode += 33;                     /* Advance past the item */
942                 
943                 switch (*stack.currentFrame->ecode) {
944                 case OP_CRSTAR:
945                 case OP_CRMINSTAR:
946                 case OP_CRPLUS:
947                 case OP_CRMINPLUS:
948                 case OP_CRQUERY:
949                 case OP_CRMINQUERY:
950                     c = *stack.currentFrame->ecode++ - OP_CRSTAR;
951                     minimize = (c & 1) != 0;
952                     min = rep_min[c];                 /* Pick up values from tables; */
953                     stack.currentFrame->max = rep_max[c];                 /* zero for max => infinity */
954                     if (stack.currentFrame->max == 0)
955                         stack.currentFrame->max = INT_MAX;
956                     break;
957                     
958                 case OP_CRRANGE:
959                 case OP_CRMINRANGE:
960                     minimize = (*stack.currentFrame->ecode == OP_CRMINRANGE);
961                     min = GET2(stack.currentFrame->ecode, 1);
962                     stack.currentFrame->max = GET2(stack.currentFrame->ecode, 3);
963                     if (stack.currentFrame->max == 0)
964                         stack.currentFrame->max = INT_MAX;
965                     stack.currentFrame->ecode += 5;
966                     break;
967                     
968                 default:               /* No repeat follows */
969                     min = stack.currentFrame->max = 1;
970                     break;
971                 }
972                 
973                 /* First, ensure the minimum number of matches are present. */
974                 
975                 for (i = 1; i <= min; i++) {
976                     if (stack.currentFrame->eptr >= md->end_subject)
977                         RRETURN_NO_MATCH;
978                     GETCHARINC(c, stack.currentFrame->eptr);
979                     if (c > 255) {
980                         if (stack.currentFrame->data[-1] == OP_CLASS)
981                             RRETURN_NO_MATCH;
982                     } else {
983                         if ((stack.currentFrame->data[c/8] & (1 << (c&7))) == 0)
984                             RRETURN_NO_MATCH;
985                     }
986                 }
987                 
988                 /* If max == min we can continue with the main loop without the
989                  need to recurse. */
990                 
991                 if (min == stack.currentFrame->max)
992                     NEXT_OPCODE;      
993                 
994                 /* If minimizing, keep testing the rest of the expression and advancing
995                  the pointer while it matches the class. */
996                 if (minimize) {
997                     {
998                         for (stack.currentFrame->fi = min;; stack.currentFrame->fi++) {
999                             RMATCH(22, stack.currentFrame->ecode, stack.currentFrame->eptrb, 0);
1000                             if (is_match)
1001                                 RRETURN;
1002                             if (stack.currentFrame->fi >= stack.currentFrame->max || stack.currentFrame->eptr >= md->end_subject)
1003                                 RRETURN;
1004                             GETCHARINC(c, stack.currentFrame->eptr);
1005                             if (c > 255) {
1006                                 if (stack.currentFrame->data[-1] == OP_CLASS)
1007                                     RRETURN;
1008                             } else {
1009                                 if ((stack.currentFrame->data[c/8] & (1 << (c&7))) == 0)
1010                                     RRETURN;
1011                             }
1012                         }
1013                     }
1014                     ASSERT_NOT_REACHED();
1015                 }
1016                 /* If maximizing, find the longest possible run, then work backwards. */
1017                 else {
1018                     stack.currentFrame->pp = stack.currentFrame->eptr;
1019                     
1020                     for (i = min; i < stack.currentFrame->max; i++) {
1021                         int len = 1;
1022                         if (stack.currentFrame->eptr >= md->end_subject)
1023                             break;
1024                         GETCHARLEN(c, stack.currentFrame->eptr, len);
1025                         if (c > 255) {
1026                             if (stack.currentFrame->data[-1] == OP_CLASS)
1027                                 break;
1028                         } else {
1029                             if ((stack.currentFrame->data[c/8] & (1 << (c&7))) == 0)
1030                                 break;
1031                         }
1032                         stack.currentFrame->eptr += len;
1033                     }
1034                     for (;;) {
1035                         RMATCH(24, stack.currentFrame->ecode, stack.currentFrame->eptrb, 0);
1036                         if (is_match)
1037                             RRETURN;
1038                         if (stack.currentFrame->eptr-- == stack.currentFrame->pp)
1039                             break;        /* Stop if tried at original pos */
1040                         BACKCHAR(stack.currentFrame->eptr);
1041                     }
1042                     
1043                     RRETURN;
1044                 }
1045                 ASSERT_NOT_REACHED();
1046                 
1047                 /* Match an extended character class. This opcode is encountered only
1048                  in UTF-8 mode, because that's the only time it is compiled. */
1049                 
1050                 BEGIN_OPCODE(XCLASS):
1051                 stack.currentFrame->data = stack.currentFrame->ecode + 1 + LINK_SIZE;                /* Save for matching */
1052                 stack.currentFrame->ecode += GET(stack.currentFrame->ecode, 1);                      /* Advance past the item */
1053                 
1054                 switch (*stack.currentFrame->ecode) {
1055                 case OP_CRSTAR:
1056                 case OP_CRMINSTAR:
1057                 case OP_CRPLUS:
1058                 case OP_CRMINPLUS:
1059                 case OP_CRQUERY:
1060                 case OP_CRMINQUERY:
1061                     c = *stack.currentFrame->ecode++ - OP_CRSTAR;
1062                     minimize = (c & 1) != 0;
1063                     min = rep_min[c];                 /* Pick up values from tables; */
1064                     stack.currentFrame->max = rep_max[c];                 /* zero for max => infinity */
1065                     if (stack.currentFrame->max == 0)
1066                         stack.currentFrame->max = INT_MAX;
1067                     break;
1068                     
1069                 case OP_CRRANGE:
1070                 case OP_CRMINRANGE:
1071                     minimize = (*stack.currentFrame->ecode == OP_CRMINRANGE);
1072                     min = GET2(stack.currentFrame->ecode, 1);
1073                     stack.currentFrame->max = GET2(stack.currentFrame->ecode, 3);
1074                     if (stack.currentFrame->max == 0)
1075                         stack.currentFrame->max = INT_MAX;
1076                     stack.currentFrame->ecode += 5;
1077                     break;
1078                     
1079                 default:               /* No repeat follows */
1080                     min = stack.currentFrame->max = 1;
1081             }
1082                 
1083                 /* First, ensure the minimum number of matches are present. */
1084                 
1085                 for (i = 1; i <= min; i++) {
1086                     if (stack.currentFrame->eptr >= md->end_subject)
1087                         RRETURN_NO_MATCH;
1088                     GETCHARINC(c, stack.currentFrame->eptr);
1089                     if (!_pcre_xclass(c, stack.currentFrame->data))
1090                         RRETURN_NO_MATCH;
1091                 }
1092                 
1093                 /* If max == min we can continue with the main loop without the
1094                  need to recurse. */
1095                 
1096                 if (min == stack.currentFrame->max)
1097                     NEXT_OPCODE;
1098                 
1099                 /* If minimizing, keep testing the rest of the expression and advancing
1100                  the pointer while it matches the class. */
1101                 
1102                 if (minimize) {
1103                     for (stack.currentFrame->fi = min;; stack.currentFrame->fi++) {
1104                         RMATCH(26, stack.currentFrame->ecode, stack.currentFrame->eptrb, 0);
1105                         if (is_match)
1106                             RRETURN;
1107                         if (stack.currentFrame->fi >= stack.currentFrame->max || stack.currentFrame->eptr >= md->end_subject)
1108                             RRETURN;
1109                         GETCHARINC(c, stack.currentFrame->eptr);
1110                         if (!_pcre_xclass(c, stack.currentFrame->data))
1111                             RRETURN;
1112                     }
1113                     ASSERT_NOT_REACHED();
1114                 }
1115                 
1116                 /* If maximizing, find the longest possible run, then work backwards. */
1117                 
1118                 else {
1119                     stack.currentFrame->pp = stack.currentFrame->eptr;
1120                     for (i = min; i < stack.currentFrame->max; i++) {
1121                         int len = 1;
1122                         if (stack.currentFrame->eptr >= md->end_subject)
1123                             break;
1124                         GETCHARLEN(c, stack.currentFrame->eptr, len);
1125                         if (!_pcre_xclass(c, stack.currentFrame->data))
1126                             break;
1127                         stack.currentFrame->eptr += len;
1128                     }
1129                     for(;;) {
1130                         RMATCH(27, stack.currentFrame->ecode, stack.currentFrame->eptrb, 0);
1131                         if (is_match)
1132                             RRETURN;
1133                         if (stack.currentFrame->eptr-- == stack.currentFrame->pp)
1134                             break;        /* Stop if tried at original pos */
1135                         BACKCHAR(stack.currentFrame->eptr)
1136                     }
1137                     RRETURN;
1138                 }
1139                 
1140                 ASSERT_NOT_REACHED();
1141                 
1142                 /* Match a single character, casefully */
1143                 
1144                 BEGIN_OPCODE(CHAR):
1145                 stack.currentFrame->length = 1;
1146                 stack.currentFrame->ecode++;
1147                 GETUTF8CHARLEN(stack.currentFrame->fc, stack.currentFrame->ecode, stack.currentFrame->length);
1148             {
1149                 int dc;
1150                 stack.currentFrame->ecode += stack.currentFrame->length;
1151                 switch (md->end_subject - stack.currentFrame->eptr) {
1152                 case 0:
1153                     RRETURN_NO_MATCH;
1154                 case 1:
1155                     dc = *stack.currentFrame->eptr++;
1156                     if (IS_LEADING_SURROGATE(dc))
1157                         RRETURN_NO_MATCH;
1158                     break;
1159                     default:
1160                     GETCHARINC(dc, stack.currentFrame->eptr);
1161                 }
1162                 if (stack.currentFrame->fc != dc)
1163                     RRETURN_NO_MATCH;
1164             }
1165                 NEXT_OPCODE;
1166                 
1167                 /* Match a single character, caselessly */
1168                 
1169                 BEGIN_OPCODE(CHARNC):
1170                 stack.currentFrame->length = 1;
1171                 stack.currentFrame->ecode++;
1172                 GETUTF8CHARLEN(stack.currentFrame->fc, stack.currentFrame->ecode, stack.currentFrame->length);
1173                 
1174                 if (md->end_subject - stack.currentFrame->eptr == 0)
1175                     RRETURN_NO_MATCH;
1176                 
1177             {
1178                 int dc;
1179                 if (md->end_subject - stack.currentFrame->eptr == 1) {
1180                     dc = *stack.currentFrame->eptr++;
1181                     if (IS_LEADING_SURROGATE(dc))
1182                         RRETURN_NO_MATCH;
1183                 } else
1184                     GETCHARINC(dc, stack.currentFrame->eptr);
1185                 stack.currentFrame->ecode += stack.currentFrame->length;
1186                 
1187                 /* If we have Unicode property support, we can use it to test the other
1188                  case of the character, if there is one. */
1189                 
1190                 if (stack.currentFrame->fc != dc) {
1191                     if (dc != _pcre_ucp_othercase(stack.currentFrame->fc))
1192                         RRETURN_NO_MATCH;
1193                 }
1194             }
1195                 NEXT_OPCODE;
1196                 
1197                 /* Match a single ASCII character. */
1198                 
1199                 BEGIN_OPCODE(ASCII_CHAR):
1200                 if (md->end_subject == stack.currentFrame->eptr)
1201                     RRETURN_NO_MATCH;
1202                 if (*stack.currentFrame->eptr != stack.currentFrame->ecode[1])
1203                     RRETURN_NO_MATCH;
1204                 ++stack.currentFrame->eptr;
1205                 stack.currentFrame->ecode += 2;
1206                 NEXT_OPCODE;
1207                 
1208                 /* Match one of two cases of an ASCII character. */
1209                 
1210                 BEGIN_OPCODE(ASCII_LETTER_NC):
1211                 if (md->end_subject == stack.currentFrame->eptr)
1212                     RRETURN_NO_MATCH;
1213                 if ((*stack.currentFrame->eptr | 0x20) != stack.currentFrame->ecode[1])
1214                     RRETURN_NO_MATCH;
1215                 ++stack.currentFrame->eptr;
1216                 stack.currentFrame->ecode += 2;
1217                 NEXT_OPCODE;
1218                 
1219                 /* Match a single character repeatedly; different opcodes share code. */
1220                 
1221                 BEGIN_OPCODE(EXACT):
1222                 min = stack.currentFrame->max = GET2(stack.currentFrame->ecode, 1);
1223                 minimize = false;
1224                 stack.currentFrame->ecode += 3;
1225                 goto REPEATCHAR;
1226                 
1227                 BEGIN_OPCODE(UPTO):
1228                 BEGIN_OPCODE(MINUPTO):
1229                 min = 0;
1230                 stack.currentFrame->max = GET2(stack.currentFrame->ecode, 1);
1231                 minimize = *stack.currentFrame->ecode == OP_MINUPTO;
1232                 stack.currentFrame->ecode += 3;
1233                 goto REPEATCHAR;
1234                 
1235                 BEGIN_OPCODE(STAR):
1236                 BEGIN_OPCODE(MINSTAR):
1237                 BEGIN_OPCODE(PLUS):
1238                 BEGIN_OPCODE(MINPLUS):
1239                 BEGIN_OPCODE(QUERY):
1240                 BEGIN_OPCODE(MINQUERY):
1241                 c = *stack.currentFrame->ecode++ - OP_STAR;
1242                 minimize = (c & 1) != 0;
1243                 min = rep_min[c];                 /* Pick up values from tables; */
1244                 stack.currentFrame->max = rep_max[c];                 /* zero for max => infinity */
1245                 if (stack.currentFrame->max == 0)
1246                     stack.currentFrame->max = INT_MAX;
1247                 
1248                 /* Common code for all repeated single-character matches. We can give
1249                  up quickly if there are fewer than the minimum number of characters left in
1250                  the subject. */
1251                 
1252             REPEATCHAR:
1253                 
1254                 stack.currentFrame->length = 1;
1255                 GETUTF8CHARLEN(stack.currentFrame->fc, stack.currentFrame->ecode, stack.currentFrame->length);
1256                 if (min * (stack.currentFrame->fc > 0xFFFF ? 2 : 1) > md->end_subject - stack.currentFrame->eptr)
1257                     RRETURN_NO_MATCH;
1258                 stack.currentFrame->ecode += stack.currentFrame->length;
1259                 
1260                 if (stack.currentFrame->fc <= 0xFFFF) {
1261                     int othercase = md->caseless ? _pcre_ucp_othercase(stack.currentFrame->fc) : -1;
1262                     
1263                     for (i = 1; i <= min; i++) {
1264                         if (*stack.currentFrame->eptr != stack.currentFrame->fc && *stack.currentFrame->eptr != othercase)
1265                             RRETURN_NO_MATCH;
1266                         ++stack.currentFrame->eptr;
1267                     }
1268                     
1269                     if (min == stack.currentFrame->max)
1270                         NEXT_OPCODE;
1271                     
1272                     if (minimize) {
1273                         stack.currentFrame->repeat_othercase = othercase;
1274                         for (stack.currentFrame->fi = min;; stack.currentFrame->fi++) {
1275                             RMATCH(28, stack.currentFrame->ecode, stack.currentFrame->eptrb, 0);
1276                             if (is_match)
1277                                 RRETURN;
1278                             if (stack.currentFrame->fi >= stack.currentFrame->max || stack.currentFrame->eptr >= md->end_subject)
1279                                 RRETURN;
1280                             if (*stack.currentFrame->eptr != stack.currentFrame->fc && *stack.currentFrame->eptr != stack.currentFrame->repeat_othercase)
1281                                 RRETURN;
1282                             ++stack.currentFrame->eptr;
1283                         }
1284                         ASSERT_NOT_REACHED();
1285                     } else {
1286                         stack.currentFrame->pp = stack.currentFrame->eptr;
1287                         for (i = min; i < stack.currentFrame->max; i++) {
1288                             if (stack.currentFrame->eptr >= md->end_subject)
1289                                 break;
1290                             if (*stack.currentFrame->eptr != stack.currentFrame->fc && *stack.currentFrame->eptr != othercase)
1291                                 break;
1292                             ++stack.currentFrame->eptr;
1293                         }
1294                         while (stack.currentFrame->eptr >= stack.currentFrame->pp) {
1295                             RMATCH(29, stack.currentFrame->ecode, stack.currentFrame->eptrb, 0);
1296                             if (is_match)
1297                                 RRETURN;
1298                             --stack.currentFrame->eptr;
1299                         }
1300                         RRETURN_NO_MATCH;
1301                     }
1302                     ASSERT_NOT_REACHED();
1303                 } else {
1304                     /* No case on surrogate pairs, so no need to bother with "othercase". */
1305                     
1306                     for (i = 1; i <= min; i++) {
1307                         int nc;
1308                         GETCHAR(nc, stack.currentFrame->eptr);
1309                         if (nc != stack.currentFrame->fc)
1310                             RRETURN_NO_MATCH;
1311                         stack.currentFrame->eptr += 2;
1312                     }
1313                     
1314                     if (min == stack.currentFrame->max)
1315                         NEXT_OPCODE;
1316                     
1317                     if (minimize) {
1318                         for (stack.currentFrame->fi = min;; stack.currentFrame->fi++) {
1319                             int nc;
1320                             RMATCH(30, stack.currentFrame->ecode, stack.currentFrame->eptrb, 0);
1321                             if (is_match)
1322                                 RRETURN;
1323                             if (stack.currentFrame->fi >= stack.currentFrame->max || stack.currentFrame->eptr >= md->end_subject)
1324                                 RRETURN;
1325                             GETCHAR(nc, stack.currentFrame->eptr);
1326                             if (*stack.currentFrame->eptr != stack.currentFrame->fc)
1327                                 RRETURN;
1328                             stack.currentFrame->eptr += 2;
1329                         }
1330                         ASSERT_NOT_REACHED();
1331                     } else {
1332                         stack.currentFrame->pp = stack.currentFrame->eptr;
1333                         for (i = min; i < stack.currentFrame->max; i++) {
1334                             int nc;
1335                             if (stack.currentFrame->eptr > md->end_subject - 2)
1336                                 break;
1337                             GETCHAR(nc, stack.currentFrame->eptr);
1338                             if (*stack.currentFrame->eptr != stack.currentFrame->fc)
1339                                 break;
1340                             stack.currentFrame->eptr += 2;
1341                         }
1342                         while (stack.currentFrame->eptr >= stack.currentFrame->pp) {
1343                             RMATCH(31, stack.currentFrame->ecode, stack.currentFrame->eptrb, 0);
1344                             if (is_match)
1345                                 RRETURN;
1346                             stack.currentFrame->eptr -= 2;
1347                         }
1348                         RRETURN_NO_MATCH;
1349                     }
1350                     ASSERT_NOT_REACHED();
1351                 }
1352                 ASSERT_NOT_REACHED();
1353                 
1354                 /* Match a negated single one-byte character. The character we are
1355                  checking can be multibyte. */
1356                 
1357                 BEGIN_OPCODE(NOT):
1358                 if (stack.currentFrame->eptr >= md->end_subject)
1359                     RRETURN_NO_MATCH;
1360                 stack.currentFrame->ecode++;
1361                 GETCHARINCTEST(c, stack.currentFrame->eptr);
1362                 if (md->caseless) {
1363                     if (c < 128)
1364                         c = md->lcc[c];
1365                     if (md->lcc[*stack.currentFrame->ecode++] == c)
1366                         RRETURN_NO_MATCH;
1367                 } else {
1368                     if (*stack.currentFrame->ecode++ == c)
1369                         RRETURN_NO_MATCH;
1370                 }
1371                 NEXT_OPCODE;
1372                 
1373                 /* Match a negated single one-byte character repeatedly. This is almost a
1374                  repeat of the code for a repeated single character, but I haven't found a
1375                  nice way of commoning these up that doesn't require a test of the
1376                  positive/negative option for each character match. Maybe that wouldn't add
1377                  very much to the time taken, but character matching *is* what this is all
1378                  about... */
1379                 
1380                 BEGIN_OPCODE(NOTEXACT):
1381                 min = stack.currentFrame->max = GET2(stack.currentFrame->ecode, 1);
1382                 minimize = false;
1383                 stack.currentFrame->ecode += 3;
1384                 goto REPEATNOTCHAR;
1385                 
1386                 BEGIN_OPCODE(NOTUPTO):
1387                 BEGIN_OPCODE(NOTMINUPTO):
1388                 min = 0;
1389                 stack.currentFrame->max = GET2(stack.currentFrame->ecode, 1);
1390                 minimize = *stack.currentFrame->ecode == OP_NOTMINUPTO;
1391                 stack.currentFrame->ecode += 3;
1392                 goto REPEATNOTCHAR;
1393                 
1394                 BEGIN_OPCODE(NOTSTAR):
1395                 BEGIN_OPCODE(NOTMINSTAR):
1396                 BEGIN_OPCODE(NOTPLUS):
1397                 BEGIN_OPCODE(NOTMINPLUS):
1398                 BEGIN_OPCODE(NOTQUERY):
1399                 BEGIN_OPCODE(NOTMINQUERY):
1400                 c = *stack.currentFrame->ecode++ - OP_NOTSTAR;
1401                 minimize = (c & 1) != 0;
1402                 min = rep_min[c];                 /* Pick up values from tables; */
1403                 stack.currentFrame->max = rep_max[c];                 /* zero for max => infinity */
1404                 if (stack.currentFrame->max == 0) stack.currentFrame->max = INT_MAX;
1405                 
1406                 /* Common code for all repeated single-byte matches. We can give up quickly
1407                  if there are fewer than the minimum number of bytes left in the
1408                  subject. */
1409                 
1410             REPEATNOTCHAR:
1411                 if (min > md->end_subject - stack.currentFrame->eptr)
1412                     RRETURN_NO_MATCH;
1413                 stack.currentFrame->fc = *stack.currentFrame->ecode++;
1414                 
1415                 /* The code is duplicated for the caseless and caseful cases, for speed,
1416                  since matching characters is likely to be quite common. First, ensure the
1417                  minimum number of matches are present. If min = max, continue at the same
1418                  level without recursing. Otherwise, if minimizing, keep trying the rest of
1419                  the expression and advancing one matching character if failing, up to the
1420                  maximum. Alternatively, if maximizing, find the maximum number of
1421                  characters and work backwards. */
1422                 
1423                 DPRINTF(("negative matching %c{%d,%d}\n", stack.currentFrame->fc, min, stack.currentFrame->max));
1424                 
1425                 if (md->caseless) {
1426                     if (stack.currentFrame->fc < 128)
1427                         stack.currentFrame->fc = md->lcc[stack.currentFrame->fc];
1428                     
1429                     {
1430                         register int d;
1431                         for (i = 1; i <= min; i++) {
1432                             GETCHARINC(d, stack.currentFrame->eptr);
1433                             if (d < 128)
1434                                 d = md->lcc[d];
1435                             if (stack.currentFrame->fc == d)
1436                                 RRETURN_NO_MATCH;
1437                         }
1438                     }
1439                     
1440                     if (min == stack.currentFrame->max)
1441                         NEXT_OPCODE;      
1442                     
1443                     if (minimize) {
1444                         register int d;
1445                         for (stack.currentFrame->fi = min;; stack.currentFrame->fi++) {
1446                             RMATCH(38, stack.currentFrame->ecode, stack.currentFrame->eptrb, 0);
1447                             if (is_match)
1448                                 RRETURN;
1449                             GETCHARINC(d, stack.currentFrame->eptr);
1450                             if (d < 128)
1451                                 d = md->lcc[d];
1452                             if (stack.currentFrame->fi >= stack.currentFrame->max || stack.currentFrame->eptr >= md->end_subject || stack.currentFrame->fc == d)
1453                                 RRETURN;
1454                         }
1455                         ASSERT_NOT_REACHED();
1456                     }
1457                     
1458                     /* Maximize case */
1459                     
1460                     else {
1461                         stack.currentFrame->pp = stack.currentFrame->eptr;
1462                         
1463                         {
1464                             register int d;
1465                             for (i = min; i < stack.currentFrame->max; i++) {
1466                                 int len = 1;
1467                                 if (stack.currentFrame->eptr >= md->end_subject)
1468                                     break;
1469                                 GETCHARLEN(d, stack.currentFrame->eptr, len);
1470                                 if (d < 128)
1471                                     d = md->lcc[d];
1472                                 if (stack.currentFrame->fc == d)
1473                                     break;
1474                                 stack.currentFrame->eptr += len;
1475                             }
1476                             for (;;) {
1477                                 RMATCH(40, stack.currentFrame->ecode, stack.currentFrame->eptrb, 0);
1478                                 if (is_match)
1479                                     RRETURN;
1480                                 if (stack.currentFrame->eptr-- == stack.currentFrame->pp)
1481                                     break;        /* Stop if tried at original pos */
1482                                 BACKCHAR(stack.currentFrame->eptr);
1483                             }
1484                         }
1485                         
1486                         RRETURN;
1487                     }
1488                     ASSERT_NOT_REACHED();
1489                 }
1490                 
1491                 /* Caseful comparisons */
1492                 
1493                 else {
1494                     {
1495                         register int d;
1496                         for (i = 1; i <= min; i++) {
1497                             GETCHARINC(d, stack.currentFrame->eptr);
1498                             if (stack.currentFrame->fc == d)
1499                                 RRETURN_NO_MATCH;
1500                         }
1501                     }
1502                     
1503                     if (min == stack.currentFrame->max)
1504                         NEXT_OPCODE;
1505                     
1506                     if (minimize) {
1507                         register int d;
1508                         for (stack.currentFrame->fi = min;; stack.currentFrame->fi++) {
1509                             RMATCH(42, stack.currentFrame->ecode, stack.currentFrame->eptrb, 0);
1510                             if (is_match)
1511                                 RRETURN;
1512                             GETCHARINC(d, stack.currentFrame->eptr);
1513                             if (stack.currentFrame->fi >= stack.currentFrame->max || stack.currentFrame->eptr >= md->end_subject || stack.currentFrame->fc == d)
1514                                 RRETURN;
1515                         }
1516                         ASSERT_NOT_REACHED();
1517                     }
1518                     
1519                     /* Maximize case */
1520                     
1521                     else {
1522                         stack.currentFrame->pp = stack.currentFrame->eptr;
1523                         
1524                         {
1525                             register int d;
1526                             for (i = min; i < stack.currentFrame->max; i++) {
1527                                 int len = 1;
1528                                 if (stack.currentFrame->eptr >= md->end_subject)
1529                                     break;
1530                                 GETCHARLEN(d, stack.currentFrame->eptr, len);
1531                                 if (stack.currentFrame->fc == d)
1532                                     break;
1533                                 stack.currentFrame->eptr += len;
1534                             }
1535                             for (;;) {
1536                                 RMATCH(44, stack.currentFrame->ecode, stack.currentFrame->eptrb, 0);
1537                                 if (is_match)
1538                                     RRETURN;
1539                                 if (stack.currentFrame->eptr-- == stack.currentFrame->pp)
1540                                     break;        /* Stop if tried at original pos */
1541                                 BACKCHAR(stack.currentFrame->eptr);
1542                             }
1543                         }
1544                         
1545                         RRETURN;
1546                     }
1547                 }
1548                 ASSERT_NOT_REACHED();
1549                 
1550                 /* Match a single character type repeatedly; several different opcodes
1551                  share code. This is very similar to the code for single characters, but we
1552                  repeat it in the interests of efficiency. */
1553                 
1554                 BEGIN_OPCODE(TYPEEXACT):
1555                 min = stack.currentFrame->max = GET2(stack.currentFrame->ecode, 1);
1556                 minimize = true;
1557                 stack.currentFrame->ecode += 3;
1558                 goto REPEATTYPE;
1559                 
1560                 BEGIN_OPCODE(TYPEUPTO):
1561                 BEGIN_OPCODE(TYPEMINUPTO):
1562                 min = 0;
1563                 stack.currentFrame->max = GET2(stack.currentFrame->ecode, 1);
1564                 minimize = *stack.currentFrame->ecode == OP_TYPEMINUPTO;
1565                 stack.currentFrame->ecode += 3;
1566                 goto REPEATTYPE;
1567                 
1568                 BEGIN_OPCODE(TYPESTAR):
1569                 BEGIN_OPCODE(TYPEMINSTAR):
1570                 BEGIN_OPCODE(TYPEPLUS):
1571                 BEGIN_OPCODE(TYPEMINPLUS):
1572                 BEGIN_OPCODE(TYPEQUERY):
1573                 BEGIN_OPCODE(TYPEMINQUERY):
1574                 c = *stack.currentFrame->ecode++ - OP_TYPESTAR;
1575                 minimize = (c & 1) != 0;
1576                 min = rep_min[c];                 /* Pick up values from tables; */
1577                 stack.currentFrame->max = rep_max[c];                 /* zero for max => infinity */
1578                 if (stack.currentFrame->max == 0)
1579                     stack.currentFrame->max = INT_MAX;
1580                 
1581                 /* Common code for all repeated single character type matches. Note that
1582                  in UTF-8 mode, '.' matches a character of any length, but for the other
1583                  character types, the valid characters are all one-byte long. */
1584                 
1585             REPEATTYPE:
1586                 stack.currentFrame->ctype = *stack.currentFrame->ecode++;      /* Code for the character type */
1587                 
1588                 /* First, ensure the minimum number of matches are present. Use inline
1589                  code for maximizing the speed, and do the type test once at the start
1590                  (i.e. keep it out of the loop). Also we can test that there are at least
1591                  the minimum number of bytes before we start. This isn't as effective in
1592                  UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that
1593                  is tidier. Also separate the UCP code, which can be the same for both UTF-8
1594                  and single-bytes. */
1595                 
1596                 if (min > md->end_subject - stack.currentFrame->eptr)
1597                     RRETURN_NO_MATCH;
1598                 if (min > 0) {
1599                     switch(stack.currentFrame->ctype) {
1600                         case OP_ANY:
1601                             for (i = 1; i <= min; i++) {
1602                                 if (stack.currentFrame->eptr >= md->end_subject || isNewline(*stack.currentFrame->eptr))
1603                                     RRETURN_NO_MATCH;
1604                                 ++stack.currentFrame->eptr;
1605                                 while (stack.currentFrame->eptr < md->end_subject && ISMIDCHAR(*stack.currentFrame->eptr))
1606                                     stack.currentFrame->eptr++;
1607                             }
1608                             break;
1609                             
1610                             case OP_NOT_DIGIT:
1611                             for (i = 1; i <= min; i++) {
1612                                 if (stack.currentFrame->eptr >= md->end_subject)
1613                                     RRETURN_NO_MATCH;
1614                                 GETCHARINC(c, stack.currentFrame->eptr);
1615                                 if (isASCIIDigit(c))
1616                                     RRETURN_NO_MATCH;
1617                             }
1618                             break;
1619                             
1620                             case OP_DIGIT:
1621                             for (i = 1; i <= min; i++) {
1622                                 if (stack.currentFrame->eptr >= md->end_subject || !isASCIIDigit(*stack.currentFrame->eptr++))
1623                                     RRETURN_NO_MATCH;
1624                                 /* No need to skip more bytes - we know it's a 1-byte character */
1625                             }
1626                             break;
1627                             
1628                             case OP_NOT_WHITESPACE:
1629                             for (i = 1; i <= min; i++) {
1630                                 if (stack.currentFrame->eptr >= md->end_subject ||
1631                                     (*stack.currentFrame->eptr < 128 && (md->ctypes[*stack.currentFrame->eptr] & ctype_space) != 0))
1632                                     RRETURN_NO_MATCH;
1633                                 while (++stack.currentFrame->eptr < md->end_subject && ISMIDCHAR(*stack.currentFrame->eptr));
1634                             }
1635                             break;
1636                             
1637                             case OP_WHITESPACE:
1638                             for (i = 1; i <= min; i++) {
1639                                 if (stack.currentFrame->eptr >= md->end_subject ||
1640                                     *stack.currentFrame->eptr >= 128 || (md->ctypes[*stack.currentFrame->eptr++] & ctype_space) == 0)
1641                                     RRETURN_NO_MATCH;
1642                                 /* No need to skip more bytes - we know it's a 1-byte character */
1643                             }
1644                             break;
1645                             
1646                             case OP_NOT_WORDCHAR:
1647                             for (i = 1; i <= min; i++) {
1648                                 if (stack.currentFrame->eptr >= md->end_subject ||
1649                                     (*stack.currentFrame->eptr < 128 && (md->ctypes[*stack.currentFrame->eptr] & ctype_word) != 0))
1650                                     RRETURN_NO_MATCH;
1651                                 while (++stack.currentFrame->eptr < md->end_subject && ISMIDCHAR(*stack.currentFrame->eptr));
1652                             }
1653                             break;
1654                             
1655                             case OP_WORDCHAR:
1656                             for (i = 1; i <= min; i++) {
1657                                 if (stack.currentFrame->eptr >= md->end_subject ||
1658                                     *stack.currentFrame->eptr >= 128 || (md->ctypes[*stack.currentFrame->eptr++] & ctype_word) == 0)
1659                                     RRETURN_NO_MATCH;
1660                                 /* No need to skip more bytes - we know it's a 1-byte character */
1661                             }
1662                             break;
1663                             
1664                             default:
1665                             ASSERT_NOT_REACHED();
1666                             RRETURN_ERROR(JSRegExpErrorInternal);
1667                     }  /* End switch(stack.currentFrame->ctype) */
1668                 }
1669                 
1670                 /* If min = max, continue at the same level without recursing */
1671                 
1672                 if (min == stack.currentFrame->max)
1673                     NEXT_OPCODE;    
1674                 
1675                 /* If minimizing, we have to test the rest of the pattern before each
1676                  subsequent match. */
1677                 
1678                 if (minimize) {
1679                     for (stack.currentFrame->fi = min;; stack.currentFrame->fi++) {
1680                         RMATCH(48, stack.currentFrame->ecode, stack.currentFrame->eptrb, 0);
1681                         if (is_match)
1682                             RRETURN;
1683                         if (stack.currentFrame->fi >= stack.currentFrame->max || stack.currentFrame->eptr >= md->end_subject)
1684                             RRETURN;
1685                         
1686                         GETCHARINC(c, stack.currentFrame->eptr);
1687                         switch(stack.currentFrame->ctype) {
1688                         case OP_ANY:
1689                             if (isNewline(c))
1690                                 RRETURN;
1691                             break;
1692                             
1693                         case OP_NOT_DIGIT:
1694                             if (isASCIIDigit(c))
1695                                 RRETURN;
1696                             break;
1697                             
1698                         case OP_DIGIT:
1699                             if (!isASCIIDigit(c))
1700                                 RRETURN;
1701                             break;
1702                             
1703                         case OP_NOT_WHITESPACE:
1704                             if (c < 128 && (md->ctypes[c] & ctype_space))
1705                                 RRETURN;
1706                             break;
1707                             
1708                         case OP_WHITESPACE:
1709                             if  (c >= 128 || !(md->ctypes[c] & ctype_space))
1710                                 RRETURN;
1711                             break;
1712                             
1713                         case OP_NOT_WORDCHAR:
1714                             if (c < 128 && (md->ctypes[c] & ctype_word))
1715                                 RRETURN;
1716                             break;
1717                             
1718                         case OP_WORDCHAR:
1719                             if (c >= 128 || !(md->ctypes[c] & ctype_word))
1720                                 RRETURN;
1721                             break;
1722                             
1723                         default:
1724                             ASSERT_NOT_REACHED();
1725                             RRETURN_ERROR(JSRegExpErrorInternal);
1726                         }
1727                     }
1728                     ASSERT_NOT_REACHED();
1729                 }
1730                 
1731                 /* If maximizing it is worth using inline code for speed, doing the type
1732                  test once at the start (i.e. keep it out of the loop). */
1733                 
1734                 else {
1735                     stack.currentFrame->pp = stack.currentFrame->eptr;  /* Remember where we started */
1736                     
1737                     switch(stack.currentFrame->ctype) {
1738                         case OP_ANY:
1739                             
1740                             /* Special code is required for UTF8, but when the maximum is unlimited
1741                              we don't need it, so we repeat the non-UTF8 code. This is probably
1742                              worth it, because .* is quite a common idiom. */
1743                             
1744                             if (stack.currentFrame->max < INT_MAX) {
1745                                 for (i = min; i < stack.currentFrame->max; i++) {
1746                                     if (stack.currentFrame->eptr >= md->end_subject || isNewline(*stack.currentFrame->eptr))
1747                                         break;
1748                                     stack.currentFrame->eptr++;
1749                                     while (stack.currentFrame->eptr < md->end_subject && (*stack.currentFrame->eptr & 0xc0) == 0x80)
1750                                         stack.currentFrame->eptr++;
1751                                 }
1752                             }
1753                             
1754                             /* Handle unlimited UTF-8 repeat */
1755                             
1756                             else {
1757                                 for (i = min; i < stack.currentFrame->max; i++) {
1758                                     if (stack.currentFrame->eptr >= md->end_subject || isNewline(*stack.currentFrame->eptr))
1759                                         break;
1760                                     stack.currentFrame->eptr++;
1761                                 }
1762                                 break;
1763                             }
1764                             break;
1765                             
1766                             case OP_NOT_DIGIT:
1767                             for (i = min; i < stack.currentFrame->max; i++) {
1768                                 int len = 1;
1769                                 if (stack.currentFrame->eptr >= md->end_subject)
1770                                     break;
1771                                 GETCHARLEN(c, stack.currentFrame->eptr, len);
1772                                 if (isASCIIDigit(c))
1773                                     break;
1774                                 stack.currentFrame->eptr+= len;
1775                             }
1776                             break;
1777                             
1778                             case OP_DIGIT:
1779                             for (i = min; i < stack.currentFrame->max; i++) {
1780                                 int len = 1;
1781                                 if (stack.currentFrame->eptr >= md->end_subject)
1782                                     break;
1783                                 GETCHARLEN(c, stack.currentFrame->eptr, len);
1784                                 if (!isASCIIDigit(c))
1785                                     break;
1786                                 stack.currentFrame->eptr+= len;
1787                             }
1788                             break;
1789                             
1790                             case OP_NOT_WHITESPACE:
1791                             for (i = min; i < stack.currentFrame->max; i++) {
1792                                 int len = 1;
1793                                 if (stack.currentFrame->eptr >= md->end_subject)
1794                                     break;
1795                                 GETCHARLEN(c, stack.currentFrame->eptr, len);
1796                                 if (c < 128 && (md->ctypes[c] & ctype_space))
1797                                     break;
1798                                 stack.currentFrame->eptr+= len;
1799                             }
1800                             break;
1801                             
1802                             case OP_WHITESPACE:
1803                             for (i = min; i < stack.currentFrame->max; i++) {
1804                                 int len = 1;
1805                                 if (stack.currentFrame->eptr >= md->end_subject)
1806                                     break;
1807                                 GETCHARLEN(c, stack.currentFrame->eptr, len);
1808                                 if (c >= 128 || !(md->ctypes[c] & ctype_space))
1809                                     break;
1810                                 stack.currentFrame->eptr+= len;
1811                             }
1812                             break;
1813                             
1814                             case OP_NOT_WORDCHAR:
1815                             for (i = min; i < stack.currentFrame->max; i++) {
1816                                 int len = 1;
1817                                 if (stack.currentFrame->eptr >= md->end_subject)
1818                                     break;
1819                                 GETCHARLEN(c, stack.currentFrame->eptr, len);
1820                                 if (c < 128 && (md->ctypes[c] & ctype_word))
1821                                     break;
1822                                 stack.currentFrame->eptr+= len;
1823                             }
1824                             break;
1825                             
1826                             case OP_WORDCHAR:
1827                             for (i = min; i < stack.currentFrame->max; i++) {
1828                                 int len = 1;
1829                                 if (stack.currentFrame->eptr >= md->end_subject)
1830                                     break;
1831                                 GETCHARLEN(c, stack.currentFrame->eptr, len);
1832                                 if (c >= 128 || !(md->ctypes[c] & ctype_word))
1833                                     break;
1834                                 stack.currentFrame->eptr+= len;
1835                             }
1836                             break;
1837                             
1838                             default:
1839                             ASSERT_NOT_REACHED();
1840                             RRETURN_ERROR(JSRegExpErrorInternal);
1841                     }
1842                     
1843                     /* stack.currentFrame->eptr is now past the end of the maximum run */
1844                     
1845                     for (;;) {
1846                         RMATCH(52, stack.currentFrame->ecode, stack.currentFrame->eptrb, 0);
1847                         if (is_match)
1848                             RRETURN;
1849                         if (stack.currentFrame->eptr-- == stack.currentFrame->pp)
1850                             break;        /* Stop if tried at original pos */
1851                         BACKCHAR(stack.currentFrame->eptr);
1852                     }
1853                     
1854                     /* Get here if we can't make it match with any permitted repetitions */
1855                     
1856                     RRETURN;
1857                 }
1858                 ASSERT_NOT_REACHED();
1859                 
1860                 BEGIN_OPCODE(CRMINPLUS):
1861                 BEGIN_OPCODE(CRMINQUERY):
1862                 BEGIN_OPCODE(CRMINRANGE):
1863                 BEGIN_OPCODE(CRMINSTAR):
1864                 BEGIN_OPCODE(CRPLUS):
1865                 BEGIN_OPCODE(CRQUERY):
1866                 BEGIN_OPCODE(CRRANGE):
1867                 BEGIN_OPCODE(CRSTAR):
1868                 ASSERT_NOT_REACHED();
1869                 RRETURN_ERROR(JSRegExpErrorInternal);
1870                 
1871 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
1872             CAPTURING_BRACKET:
1873 #else
1874                 default:
1875 #endif
1876                 /* Opening capturing bracket. If there is space in the offset vector, save
1877                  the current subject position in the working slot at the top of the vector. We
1878                  mustn't change the current values of the data slot, because they may be set
1879                  from a previous iteration of this group, and be referred to by a reference
1880                  inside the group.
1881                  
1882                  If the bracket fails to match, we need to restore this value and also the
1883                  values of the final offsets, in case they were set by a previous iteration of
1884                  the same bracket.
1885                  
1886                  If there isn't enough space in the offset vector, treat this as if it were a
1887                  non-capturing bracket. Don't worry about setting the flag for the error case
1888                  here; that is handled in the code for KET. */
1889                 
1890                 ASSERT(*stack.currentFrame->ecode > OP_BRA);
1891                 
1892                 stack.currentFrame->number = *stack.currentFrame->ecode - OP_BRA;
1893                 
1894                 /* For extended extraction brackets (large number), we have to fish out the
1895                  number from a dummy opcode at the start. */
1896                 
1897                 if (stack.currentFrame->number > EXTRACT_BASIC_MAX)
1898                     stack.currentFrame->number = GET2(stack.currentFrame->ecode, 2+LINK_SIZE);
1899                 stack.currentFrame->offset = stack.currentFrame->number << 1;
1900                 
1901 #ifdef DEBUG
1902                 printf("start bracket %d subject=", stack.currentFrame->number);
1903                 pchars(stack.currentFrame->eptr, 16, true, md);
1904                 printf("\n");
1905 #endif
1906                 
1907                 if (stack.currentFrame->offset < md->offset_max) {
1908                     stack.currentFrame->save_offset1 = md->offset_vector[stack.currentFrame->offset];
1909                     stack.currentFrame->save_offset2 = md->offset_vector[stack.currentFrame->offset + 1];
1910                     stack.currentFrame->save_offset3 = md->offset_vector[md->offset_end - stack.currentFrame->number];
1911                     
1912                     DPRINTF(("saving %d %d %d\n", stack.currentFrame->save_offset1, stack.currentFrame->save_offset2, stack.currentFrame->save_offset3));
1913                     md->offset_vector[md->offset_end - stack.currentFrame->number] = stack.currentFrame->eptr - md->start_subject;
1914                     
1915                     do {
1916                         RMATCH(1, stack.currentFrame->ecode + 1 + LINK_SIZE, stack.currentFrame->eptrb, match_isgroup);
1917                         if (is_match) RRETURN;
1918                         stack.currentFrame->ecode += GET(stack.currentFrame->ecode, 1);
1919                     } while (*stack.currentFrame->ecode == OP_ALT);
1920                     
1921                     DPRINTF(("bracket %d failed\n", stack.currentFrame->number));
1922                     
1923                     md->offset_vector[stack.currentFrame->offset] = stack.currentFrame->save_offset1;
1924                     md->offset_vector[stack.currentFrame->offset + 1] = stack.currentFrame->save_offset2;
1925                     md->offset_vector[md->offset_end - stack.currentFrame->number] = stack.currentFrame->save_offset3;
1926                     
1927                     RRETURN;
1928                 }
1929                 
1930                 /* Insufficient room for saving captured contents */
1931                 
1932                 goto NON_CAPTURING_BRACKET;
1933         }
1934         
1935         /* Do not stick any code in here without much thought; it is assumed
1936          that "continue" in the code above comes out to here to repeat the main
1937          loop. */
1938         
1939     } /* End of main loop */
1940     
1941     ASSERT_NOT_REACHED();
1942     
1943 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
1944     
1945 RRETURN_SWITCH:
1946     switch (stack.currentFrame->where)
1947     {
1948         case 0: goto RETURN;
1949         case 1: goto RRETURN_1;
1950         case 2: goto RRETURN_2;
1951         case 6: goto RRETURN_6;
1952         case 7: goto RRETURN_7;
1953         case 9: goto RRETURN_9;
1954         case 10: goto RRETURN_10;
1955         case 11: goto RRETURN_11;
1956         case 12: goto RRETURN_12;
1957         case 13: goto RRETURN_13;
1958         case 14: goto RRETURN_14;
1959         case 15: goto RRETURN_15;
1960         case 16: goto RRETURN_16;
1961         case 17: goto RRETURN_17;
1962         case 18: goto RRETURN_18;
1963         case 19: goto RRETURN_19;
1964         case 20: goto RRETURN_20;
1965         case 21: goto RRETURN_21;
1966         case 22: goto RRETURN_22;
1967         case 24: goto RRETURN_24;
1968         case 26: goto RRETURN_26;
1969         case 27: goto RRETURN_27;
1970         case 28: goto RRETURN_28;
1971         case 29: goto RRETURN_29;
1972         case 30: goto RRETURN_30;
1973         case 31: goto RRETURN_31;
1974         case 38: goto RRETURN_38;
1975         case 40: goto RRETURN_40;
1976         case 42: goto RRETURN_42;
1977         case 44: goto RRETURN_44;
1978         case 48: goto RRETURN_48;
1979         case 52: goto RRETURN_52;
1980     }
1981     
1982     abort();
1983     RRETURN_ERROR(JSRegExpErrorInternal);
1984     
1985 #endif
1986     
1987 RETURN:
1988     return is_match ? MATCH_MATCH : MATCH_NOMATCH;
1989 }
1990
1991
1992 /*************************************************
1993 *         Execute a Regular Expression           *
1994 *************************************************/
1995
1996 /* This function applies a compiled re to a subject string and picks out
1997 portions of the string if it matches. Two elements in the vector are set for
1998 each substring: the offsets to the start and end of the substring.
1999
2000 Arguments:
2001   argument_re     points to the compiled expression
2002   extra_data      points to extra data or is NULL
2003   subject         points to the subject string
2004   length          length of subject string (may contain binary zeros)
2005   start_offset    where to start in the subject string
2006   options         option bits
2007   offsets         points to a vector of ints to be filled in with offsets
2008   offsetcount     the number of elements in the vector
2009
2010 Returns:          > 0 => success; value is the number of elements filled in
2011                   = 0 => success, but offsets is not big enough
2012                    -1 => failed to match
2013                  < -1 => some kind of unexpected problem
2014 */
2015
2016 int
2017 jsRegExpExecute(const pcre *argument_re,
2018   const UChar* subject, int length, int start_offset, int *offsets,
2019   int offsetcount)
2020 {
2021 int rc, resetcount, ocount;
2022 int first_byte = -1;
2023 int req_byte = -1;
2024 int req_byte2 = -1;
2025 BOOL using_temporary_offsets = false;
2026 BOOL first_byte_caseless = false;
2027 BOOL startline;
2028 BOOL req_byte_caseless = false;
2029 match_data match_block;
2030 USPTR start_match = (USPTR)subject + start_offset;
2031 USPTR end_subject;
2032 USPTR req_byte_ptr = start_match - 1;
2033 const uschar *start_code;
2034
2035 const real_pcre *external_re = (const real_pcre *)argument_re;
2036 const real_pcre *re = external_re;
2037
2038 /* Plausibility checks */
2039
2040 ASSERT(re);
2041 ASSERT(subject);
2042 ASSERT(offsetcount >= 0);
2043 ASSERT(offsets || offsetcount == 0);
2044
2045 /* Set up other data */
2046
2047 startline = (re->options & PCRE_STARTLINE) != 0;
2048
2049 /* The code starts after the real_pcre block and the capture name table. */
2050
2051 start_code = (const uschar *)(external_re + 1);
2052
2053 match_block.start_subject = (USPTR)subject;
2054 match_block.end_subject = match_block.start_subject + length;
2055 end_subject = match_block.end_subject;
2056
2057 match_block.lcc = _pcre_default_tables + lcc_offset;
2058 match_block.ctypes = _pcre_default_tables + ctypes_offset;
2059
2060 match_block.multiline = (re->options & PCRE_MULTILINE) != 0;
2061 match_block.caseless = (re->options & PCRE_CASELESS) != 0;
2062
2063 /* If the expression has got more back references than the offsets supplied can
2064 hold, we get a temporary chunk of working store to use during the matching.
2065 Otherwise, we can use the vector supplied, rounding down its size to a multiple
2066 of 3. */
2067
2068 ocount = offsetcount - (offsetcount % 3);
2069
2070 if (re->top_backref > 0 && re->top_backref >= ocount/3)
2071   {
2072   ocount = re->top_backref * 3 + 3;
2073   match_block.offset_vector = new int[ocount];
2074   if (match_block.offset_vector == NULL) return JSRegExpErrorNoMemory;
2075   using_temporary_offsets = true;
2076   DPRINTF(("Got memory to hold back references\n"));
2077   }
2078 else match_block.offset_vector = offsets;
2079
2080 match_block.offset_end = ocount;
2081 match_block.offset_max = (2*ocount)/3;
2082 match_block.offset_overflow = false;
2083
2084 /* Compute the minimum number of offsets that we need to reset each time. Doing
2085 this makes a huge difference to execution time when there aren't many brackets
2086 in the pattern. */
2087
2088 resetcount = 2 + re->top_bracket * 2;
2089 if (resetcount > offsetcount) resetcount = ocount;
2090
2091 /* Reset the working variable associated with each extraction. These should
2092 never be used unless previously set, but they get saved and restored, and so we
2093 initialize them to avoid reading uninitialized locations. */
2094
2095 if (match_block.offset_vector != NULL)
2096   {
2097   register int *iptr = match_block.offset_vector + ocount;
2098   register int *iend = iptr - resetcount/2 + 1;
2099   while (--iptr >= iend) *iptr = -1;
2100   }
2101
2102 /* Set up the first character to match, if available. The first_byte value is
2103 never set for an anchored regular expression, but the anchoring may be forced
2104 at run time, so we have to test for anchoring. The first char may be unset for
2105 an unanchored pattern, of course. If there's no first char and the pattern was
2106 studied, there may be a bitmap of possible first characters. */
2107
2108   if ((re->options & PCRE_FIRSTSET) != 0)
2109     {
2110     first_byte = re->first_byte & 255;
2111     if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == true)
2112       first_byte = match_block.lcc[first_byte];
2113     }
2114
2115 /* For anchored or unanchored matches, there may be a "last known required
2116 character" set. */
2117
2118 if ((re->options & PCRE_REQCHSET) != 0)
2119   {
2120   req_byte = re->req_byte & 255;
2121   req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
2122   req_byte2 = (_pcre_default_tables + fcc_offset)[req_byte];  /* case flipped */
2123   }
2124
2125 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
2126 the loop runs just once. */
2127
2128 do
2129   {
2130   USPTR save_end_subject = end_subject;
2131
2132   /* Reset the maximum number of extractions we might see. */
2133
2134   if (match_block.offset_vector != NULL)
2135     {
2136     register int *iptr = match_block.offset_vector;
2137     register int *iend = iptr + resetcount;
2138     while (iptr < iend) *iptr++ = -1;
2139     }
2140
2141   /* Advance to a unique first char if possible. If firstline is true, the
2142   start of the match is constrained to the first line of a multiline string.
2143   Implement this by temporarily adjusting end_subject so that we stop scanning
2144   at a newline. If the match fails at the newline, later code breaks this loop.
2145   */
2146
2147   /* Now test for a unique first byte */
2148
2149   if (first_byte >= 0)
2150     {
2151     pcre_uchar first_char = first_byte;
2152     if (first_byte_caseless)
2153       while (start_match < end_subject)
2154         {
2155         int sm = *start_match;
2156         if (sm > 127)
2157           break;
2158         if (match_block.lcc[sm] == first_char)
2159           break;
2160         start_match++;
2161         }
2162     else
2163       while (start_match < end_subject && *start_match != first_char)
2164         start_match++;
2165     }
2166
2167   /* Or to just after \n for a multiline match if possible */
2168
2169   else if (startline)
2170     {
2171     if (start_match > match_block.start_subject + start_offset)
2172       {
2173       while (start_match < end_subject && !isNewline(start_match[-1]))
2174         start_match++;
2175       }
2176     }
2177
2178   /* Restore fudged end_subject */
2179
2180   end_subject = save_end_subject;
2181
2182 #ifdef DEBUG  /* Sigh. Some compilers never learn. */
2183   printf(">>>> Match against: ");
2184   pchars(start_match, end_subject - start_match, true, &match_block);
2185   printf("\n");
2186 #endif
2187
2188   /* If req_byte is set, we know that that character must appear in the subject
2189   for the match to succeed. If the first character is set, req_byte must be
2190   later in the subject; otherwise the test starts at the match point. This
2191   optimization can save a huge amount of backtracking in patterns with nested
2192   unlimited repeats that aren't going to match. Writing separate code for
2193   cased/caseless versions makes it go faster, as does using an autoincrement
2194   and backing off on a match.
2195
2196   HOWEVER: when the subject string is very, very long, searching to its end can
2197   take a long time, and give bad performance on quite ordinary patterns. This
2198   showed up when somebody was matching /^C/ on a 32-megabyte string... so we
2199   don't do this when the string is sufficiently long.
2200
2201   ALSO: this processing is disabled when partial matching is requested.
2202   */
2203
2204   if (req_byte >= 0 &&
2205       end_subject - start_match < REQ_BYTE_MAX)
2206     {
2207     register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
2208
2209     /* We don't need to repeat the search if we haven't yet reached the
2210     place we found it at last time. */
2211
2212     if (p > req_byte_ptr)
2213       {
2214       if (req_byte_caseless)
2215         {
2216         while (p < end_subject)
2217           {
2218           register int pp = *p++;
2219           if (pp == req_byte || pp == req_byte2) { p--; break; }
2220           }
2221         }
2222       else
2223         {
2224         while (p < end_subject)
2225           {
2226           if (*p++ == req_byte) { p--; break; }
2227           }
2228         }
2229
2230       /* If we can't find the required character, break the matching loop */
2231
2232       if (p >= end_subject) break;
2233
2234       /* If we have found the required character, save the point where we
2235       found it, so that we don't search again next time round the loop if
2236       the start hasn't passed this character yet. */
2237
2238       req_byte_ptr = p;
2239       }
2240     }
2241
2242   /* When a match occurs, substrings will be set for all internal extractions;
2243   we just need to set up the whole thing as substring 0 before returning. If
2244   there were too many extractions, set the return code to zero. In the case
2245   where we had to get some local store to hold offsets for backreferences, copy
2246   those back references that we can. In this case there need not be overflow
2247   if certain parts of the pattern were not used. */
2248
2249   match_block.match_call_count = 0;
2250
2251   rc = match(start_match, start_code, 2, &match_block);
2252
2253   /* When the result is no match, if the subject's first character was a
2254   newline and the PCRE_FIRSTLINE option is set, break (which will return
2255   PCRE_ERROR_NOMATCH). The option requests that a match occur before the first
2256   newline in the subject. Otherwise, advance the pointer to the next character
2257   and continue - but the continuation will actually happen only when the
2258   pattern is not anchored. */
2259
2260   if (rc == MATCH_NOMATCH)
2261     {
2262     start_match++;
2263       while(start_match < end_subject && ISMIDCHAR(*start_match))
2264         start_match++;
2265     continue;
2266     }
2267
2268   if (rc != MATCH_MATCH)
2269     {
2270     DPRINTF((">>>> error: returning %d\n", rc));
2271     return rc;
2272     }
2273
2274   /* We have a match! Copy the offset information from temporary store if
2275   necessary */
2276
2277   if (using_temporary_offsets)
2278     {
2279     if (offsetcount >= 4)
2280       {
2281       memcpy(offsets + 2, match_block.offset_vector + 2,
2282         (offsetcount - 2) * sizeof(int));
2283       DPRINTF(("Copied offsets from temporary memory\n"));
2284       }
2285     if (match_block.end_offset_top > offsetcount)
2286       match_block.offset_overflow = true;
2287
2288     DPRINTF(("Freeing temporary memory\n"));
2289     delete [] match_block.offset_vector;
2290     }
2291
2292   rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;
2293
2294   if (offsetcount < 2) rc = 0; else
2295     {
2296     offsets[0] = start_match - match_block.start_subject;
2297     offsets[1] = match_block.end_match_ptr - match_block.start_subject;
2298     }
2299
2300   DPRINTF((">>>> returning %d\n", rc));
2301   return rc;
2302   }
2303
2304 /* This "while" is the end of the "do" above */
2305
2306 while (start_match <= end_subject);
2307
2308 if (using_temporary_offsets)
2309   {
2310   DPRINTF(("Freeing temporary memory\n"));
2311   delete [] match_block.offset_vector;
2312   }
2313
2314   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
2315   return JSRegExpErrorNoMatch;
2316 }