1 /* This is JavaScriptCore's variant of the PCRE library. While this library
2 started out as a copy of PCRE, many of the features of PCRE have been
3 removed. This library now supports only the regular expression features
4 required by the JavaScript language specification, and has only the functions
5 needed by JavaScriptCore and the rest of WebKit.
7 Originally written by Philip Hazel
8 Copyright (c) 1997-2006 University of Cambridge
9 Copyright (C) 2002, 2004, 2006, 2007 Apple Inc. All rights reserved.
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
40 /* This module contains jsRegExpExecute(), the externally visible function
41 that does pattern matching using an NFA algorithm, following the rules from
42 the JavaScript specification. There are also some supporting functions. */
46 #include "pcre_internal.h"
48 #include <wtf/ASCIICType.h>
49 #include <wtf/Vector.h>
54 #define USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
55 //#define USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
58 /* Avoid warnings on Windows. */
62 /* Structure for building a chain of data that actually lives on the
63 stack, for holding the values of the subject pointer at the start of each
64 subpattern, so as to detect when an empty string has been matched by a
65 subpattern - to break infinite loops. When NO_RECURSE is set, these blocks
66 are on the heap, not on the stack. */
68 typedef struct eptrblock {
69 struct eptrblock *epb_prev;
73 /* Structure for remembering the local variables in a private frame */
75 typedef struct matchframe {
76 /* Where to jump back to */
77 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
83 struct matchframe *prevframe;
85 /* Function arguments that may change */
87 const pcre_uchar *eptr;
92 /* Function local variables */
98 const pcre_uchar *saved_eptr;
100 int repeat_othercase;
109 int save_offset1, save_offset2, save_offset3;
114 /* Structure for passing "static" information around between the functions
115 doing traditional NFA matching, so that they are thread-safe. */
117 typedef struct match_data {
118 unsigned long int match_call_count; /* As it says */
119 int *offset_vector; /* Offset vector */
120 int offset_end; /* One past the end */
121 int offset_max; /* The maximum usable for return data */
122 const uschar *lcc; /* Points to lower casing table */
123 const uschar *ctypes; /* Points to table of type maps */
124 BOOL offset_overflow; /* Set if too many extractions */
125 USPTR start_subject; /* Start of the subject string */
126 USPTR end_subject; /* End of the subject string */
127 USPTR end_match_ptr; /* Subject position at end match */
128 int end_offset_top; /* Highwater mark at end of match */
133 #define match_isgroup true /* Set if start of bracketed group */
135 /* Non-error returns from the match() function. Error returns are externally
136 defined PCRE_ERROR_xxx codes, which are all negative. */
138 #define MATCH_MATCH 1
139 #define MATCH_NOMATCH 0
141 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
143 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
144 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
149 /*************************************************
150 * Debugging function to print chars *
151 *************************************************/
153 /* Print a sequence of chars in printable format, stopping at the end of the
154 subject if the requested.
157 p points to characters
158 length number to print
159 is_subject true if printing from within md->start_subject
160 md pointer to matching data block, if is_subject is true
166 pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
169 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
171 if (isprint(c = *(p++))) printf("%c", c);
172 else if (c < 256) printf("\\x%02x", c);
173 else printf("\\x{%x}", c);
179 /*************************************************
180 * Match a back-reference *
181 *************************************************/
183 /* If a back reference hasn't been set, the length that is passed is greater
184 than the number of characters left in the string, so the match fails.
187 offset index into the offset vector
188 eptr points into the subject
189 length length to be matched
190 md points to match data block
192 Returns: true if matched
196 match_ref(int offset, register USPTR eptr, int length, match_data *md)
198 USPTR p = md->start_subject + md->offset_vector[offset];
201 if (eptr >= md->end_subject)
202 printf("matching subject <null>");
205 printf("matching subject ");
206 pchars(eptr, length, true, md);
208 printf(" against backref ");
209 pchars(p, length, false, md);
213 /* Always fail if not enough characters left */
215 if (length > md->end_subject - eptr) return false;
217 /* Separate the caselesss case for speed */
224 int othercase = _pcre_ucp_othercase(c);
225 pcre_uchar d = *eptr++;
226 if (c != d && othercase != d) return false;
230 { while (length-- > 0) if (*p++ != *eptr++) return false; }
237 /***************************************************************************
238 ****************************************************************************
239 RECURSION IN THE match() FUNCTION
241 The original match() function was highly recursive. The current version
242 still has the remnants of the original in that recursive processing of the
243 regular expression is triggered by invoking a macro named RMATCH. This is
244 no longer really much like a recursive call to match() itself.
245 ****************************************************************************
246 ***************************************************************************/
248 /* These versions of the macros use the stack, as normal. There are debugging
249 versions and production versions. */
251 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
253 /* Use numbered labels and switch statement at the bottom of the match function. */
255 #define RMATCH_WHERE(num) num
256 #define RRETURN_LABEL RRETURN_SWITCH
260 /* Use GCC's computed goto extension. */
262 /* For one test case this is more than 40% faster than the switch statement.
263 We could avoid the use of the num argument entirely by using local labels,
264 but using it for the GCC case as well as the non-GCC case allows us to share
265 a bit more code and notice if we use conflicting numbers.*/
267 #define RMATCH_WHERE(num) &&RRETURN_##num
268 #define RRETURN_LABEL *stack.currentFrame->where
272 #define RMATCH(num, ra, rb, rc)\
274 if (stack.currentFrame >= stack.frames && stack.currentFrame + 1 < stack.framesEnd)\
275 newframe = stack.currentFrame + 1;\
277 newframe = new matchframe;\
278 newframe->eptr = stack.currentFrame->eptr;\
279 newframe->ecode = (ra);\
280 newframe->offset_top = stack.currentFrame->offset_top;\
281 newframe->eptrb = (rb);\
282 is_group_start = (rc);\
284 newframe->prevframe = stack.currentFrame;\
285 stack.currentFrame = newframe;\
286 stack.currentFrame->where = RMATCH_WHERE(num);\
287 DPRINTF(("restarting from line %d\n", __LINE__));\
290 newframe = stack.currentFrame;\
291 stack.currentFrame = stack.currentFrame->prevframe;\
292 if (!(newframe >= stack.frames && newframe < stack.framesEnd))\
295 DPRINTF(("did a goto back to line %d\n", __LINE__));\
298 #define RRETURN goto RRETURN_LABEL
300 #define RRETURN_NO_MATCH \
306 #define RRETURN_ERROR(error) \
307 return matchError(error, stack);
309 /*************************************************
310 * Match from current position *
311 *************************************************/
313 /* On entry ecode points to the first opcode, and eptr to the first character
314 in the subject string, while eptrb holds the value of eptr at the start of the
315 last bracketed group - used for breaking infinite loops matching zero-length
316 strings. This function is called recursively in many circumstances. Whenever it
317 returns a negative (error) response, the outer incarnation must also return the
321 eptr pointer in subject
322 ecode position in code
323 offset_top current top pointer
324 md pointer to "static" info for the match
326 Returns: MATCH_MATCH if matched ) these values are >= 0
327 MATCH_NOMATCH if failed to match )
328 a negative PCRE_ERROR_xxx value if aborted by an error condition
329 (e.g. stopped by repeated call or recursion limit)
335 framesEnd = frames + sizeof(frames) / sizeof(frames[0]);
336 currentFrame = frames;
338 matchframe frames[16];
339 matchframe* framesEnd;
340 matchframe* currentFrame;
342 void unrollAnyHeapAllocatedFrames()
344 while (!(currentFrame >= frames && currentFrame < framesEnd)) {
345 matchframe* parentFrame = currentFrame->prevframe;
347 currentFrame = parentFrame;
352 static int matchError(int errorCode, MatchStack& stack)
354 stack.unrollAnyHeapAllocatedFrames();
358 static int match(USPTR eptr, const uschar* ecode, int offset_top, match_data* md)
360 register int is_match = false;
368 BOOL is_group_start = true;
370 BOOL minimize = false; /* Initialization not really needed, but some compilers think so. */
372 /* The value 16 here is large enough that most regular expressions don't require
373 any calls to pcre_stack_malloc, yet the amount of stack used for the array is
374 modest enough that we don't run out of stack. */
376 matchframe* newframe;
378 /* The opcode jump table. */
379 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
380 #define EMIT_JUMP_TABLE_ENTRY(opcode) &&LABEL_OP_##opcode,
381 static void* opcode_jump_table[256] = { FOR_EACH_OPCODE(EMIT_JUMP_TABLE_ENTRY) };
382 #undef EMIT_JUMP_TABLE_ENTRY
385 /* One-time setup of the opcode jump table. */
386 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
388 while (!opcode_jump_table[i])
389 opcode_jump_table[i--] = &&CAPTURING_BRACKET;
392 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
393 stack.currentFrame->where = &&RETURN;
395 stack.currentFrame->where = 0;
398 stack.currentFrame->eptr = eptr;
399 stack.currentFrame->ecode = ecode;
400 stack.currentFrame->offset_top = offset_top;
401 stack.currentFrame->eptrb = NULL;
403 /* This is where control jumps back to to effect "recursion" */
407 /* OK, now we can get on with the real code of the function. Recursive calls
408 are specified by the macro RMATCH and RRETURN is used to return. When
409 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
410 and a "return", respectively (possibly with some debugging if DEBUG is
411 defined). However, RMATCH isn't like a function call because it's quite a
412 complicated macro. It has to be used in one particular way. This shouldn't,
413 however, impact performance when true recursion is being used. */
415 /* First check that we haven't called match() too many times, or that we
416 haven't exceeded the recursive call limit. */
418 if (md->match_call_count++ >= MATCH_LIMIT)
419 RRETURN_ERROR(JSRegExpErrorMatchLimit);
420 if (rdepth >= MATCH_LIMIT_RECURSION)
421 RRETURN_ERROR(JSRegExpErrorRecursionLimit);
423 /* At the start of a bracketed group, add the current subject pointer to the
424 stack of such pointers, to be re-instated at the end of the group when we hit
425 the closing ket. When match() is called in other circumstances, we don't add to
428 if (is_group_start) {
429 stack.currentFrame->newptrb.epb_prev = stack.currentFrame->eptrb;
430 stack.currentFrame->newptrb.epb_saved_eptr = stack.currentFrame->eptr;
431 stack.currentFrame->eptrb = &stack.currentFrame->newptrb;
434 /* Now start processing the operations. */
436 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
441 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
442 #define BEGIN_OPCODE(opcode) LABEL_OP_##opcode
443 #define NEXT_OPCODE goto *opcode_jump_table[*stack.currentFrame->ecode]
445 #define BEGIN_OPCODE(opcode) case OP_##opcode
446 #define NEXT_OPCODE continue
449 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
452 switch (*stack.currentFrame->ecode)
455 /* Non-capturing bracket: optimized */
458 NON_CAPTURING_BRACKET:
459 DPRINTF(("start bracket 0\n"));
461 RMATCH(2, stack.currentFrame->ecode + 1 + LINK_SIZE, stack.currentFrame->eptrb, match_isgroup);
464 stack.currentFrame->ecode += GET(stack.currentFrame->ecode, 1);
465 } while (*stack.currentFrame->ecode == OP_ALT);
466 DPRINTF(("bracket 0 failed\n"));
469 /* Skip over large extraction number data if encountered. */
471 BEGIN_OPCODE(BRANUMBER):
472 stack.currentFrame->ecode += 3;
475 /* End of the pattern. */
478 md->end_match_ptr = stack.currentFrame->eptr; /* Record where we ended */
479 md->end_offset_top = stack.currentFrame->offset_top; /* and how many extracts were taken */
483 /* Assertion brackets. Check the alternative branches in turn - the
484 matching won't pass the KET for an assertion. If any one branch matches,
485 the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
486 start of each branch to move the current point backwards, so the code at
487 this level is identical to the lookahead case. */
489 BEGIN_OPCODE(ASSERT):
491 RMATCH(6, stack.currentFrame->ecode + 1 + LINK_SIZE, NULL, match_isgroup);
493 stack.currentFrame->ecode += GET(stack.currentFrame->ecode, 1);
494 } while (*stack.currentFrame->ecode == OP_ALT);
495 if (*stack.currentFrame->ecode == OP_KET)
498 /* Continue from after the assertion, updating the offsets high water
499 mark, since extracts may have been taken during the assertion. */
501 do stack.currentFrame->ecode += GET(stack.currentFrame->ecode,1); while (*stack.currentFrame->ecode == OP_ALT);
502 stack.currentFrame->ecode += 1 + LINK_SIZE;
503 stack.currentFrame->offset_top = md->end_offset_top;
506 /* Negative assertion: all branches must fail to match */
508 BEGIN_OPCODE(ASSERT_NOT):
510 RMATCH(7, stack.currentFrame->ecode + 1 + LINK_SIZE, NULL, match_isgroup);
513 stack.currentFrame->ecode += GET(stack.currentFrame->ecode,1);
514 } while (*stack.currentFrame->ecode == OP_ALT);
516 stack.currentFrame->ecode += 1 + LINK_SIZE;
519 /* "Once" brackets are like assertion brackets except that after a match,
520 the point in the subject string is not moved back. Thus there can never be
521 a move back into the brackets. Friedl calls these "atomic" subpatterns.
522 Check the alternative branches in turn - the matching won't pass the KET
523 for this kind of subpattern. If any one branch matches, we carry on as at
524 the end of a normal bracket, leaving the subject pointer. */
527 stack.currentFrame->prev = stack.currentFrame->ecode;
528 stack.currentFrame->saved_eptr = stack.currentFrame->eptr;
531 RMATCH(9, stack.currentFrame->ecode + 1 + LINK_SIZE, stack.currentFrame->eptrb, match_isgroup);
534 stack.currentFrame->ecode += GET(stack.currentFrame->ecode,1);
535 } while (*stack.currentFrame->ecode == OP_ALT);
537 /* If hit the end of the group (which could be repeated), fail */
539 if (*stack.currentFrame->ecode != OP_ONCE && *stack.currentFrame->ecode != OP_ALT)
542 /* Continue as from after the assertion, updating the offsets high water
543 mark, since extracts may have been taken. */
545 do stack.currentFrame->ecode += GET(stack.currentFrame->ecode,1); while (*stack.currentFrame->ecode == OP_ALT);
547 stack.currentFrame->offset_top = md->end_offset_top;
548 stack.currentFrame->eptr = md->end_match_ptr;
550 /* For a non-repeating ket, just continue at this level. This also
551 happens for a repeating ket if no characters were matched in the group.
552 This is the forcible breaking of infinite loops as implemented in Perl
553 5.005. If there is an options reset, it will get obeyed in the normal
556 if (*stack.currentFrame->ecode == OP_KET || stack.currentFrame->eptr == stack.currentFrame->saved_eptr) {
557 stack.currentFrame->ecode += 1+LINK_SIZE;
561 /* The repeating kets try the rest of the pattern or restart from the
562 preceding bracket, in the appropriate order. We need to reset any options
563 that changed within the bracket before re-running it, so check the next
566 if (*stack.currentFrame->ecode == OP_KETRMIN) {
567 RMATCH(10, stack.currentFrame->ecode + 1 + LINK_SIZE, stack.currentFrame->eptrb, 0);
570 RMATCH(11, stack.currentFrame->prev, stack.currentFrame->eptrb, match_isgroup);
573 } else { /* OP_KETRMAX */
574 RMATCH(12, stack.currentFrame->prev, stack.currentFrame->eptrb, match_isgroup);
577 RMATCH(13, stack.currentFrame->ecode + 1+LINK_SIZE, stack.currentFrame->eptrb, 0);
583 /* An alternation is the end of a branch; scan along to find the end of the
584 bracketed group and go to there. */
587 do stack.currentFrame->ecode += GET(stack.currentFrame->ecode,1); while (*stack.currentFrame->ecode == OP_ALT);
590 /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
591 that it may occur zero times. It may repeat infinitely, or not at all -
592 i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
593 repeat limits are compiled as a number of copies, with the optional ones
594 preceded by BRAZERO or BRAMINZERO. */
596 BEGIN_OPCODE(BRAZERO):
598 stack.currentFrame->next = stack.currentFrame->ecode+1;
599 RMATCH(14, stack.currentFrame->next, stack.currentFrame->eptrb, match_isgroup);
602 do stack.currentFrame->next += GET(stack.currentFrame->next,1); while (*stack.currentFrame->next == OP_ALT);
603 stack.currentFrame->ecode = stack.currentFrame->next + 1+LINK_SIZE;
607 BEGIN_OPCODE(BRAMINZERO):
609 stack.currentFrame->next = stack.currentFrame->ecode+1;
610 do stack.currentFrame->next += GET(stack.currentFrame->next,1); while (*stack.currentFrame->next == OP_ALT);
611 RMATCH(15, stack.currentFrame->next + 1+LINK_SIZE, stack.currentFrame->eptrb, match_isgroup);
614 stack.currentFrame->ecode++;
618 /* End of a group, repeated or non-repeating. If we are at the end of
619 an assertion "group", stop matching and return MATCH_MATCH, but record the
620 current high water mark for use by positive assertions. Do this also
621 for the "once" (not-backup up) groups. */
624 BEGIN_OPCODE(KETRMIN):
625 BEGIN_OPCODE(KETRMAX):
626 stack.currentFrame->prev = stack.currentFrame->ecode - GET(stack.currentFrame->ecode, 1);
627 stack.currentFrame->saved_eptr = stack.currentFrame->eptrb->epb_saved_eptr;
629 /* Back up the stack of bracket start pointers. */
631 stack.currentFrame->eptrb = stack.currentFrame->eptrb->epb_prev;
633 if (*stack.currentFrame->prev == OP_ASSERT || *stack.currentFrame->prev == OP_ASSERT_NOT || *stack.currentFrame->prev == OP_ONCE) {
634 md->end_match_ptr = stack.currentFrame->eptr; /* For ONCE */
635 md->end_offset_top = stack.currentFrame->offset_top;
640 /* In all other cases except a conditional group we have to check the
641 group number back at the start and if necessary complete handling an
642 extraction by setting the offsets and bumping the high water mark. */
644 stack.currentFrame->number = *stack.currentFrame->prev - OP_BRA;
646 /* For extended extraction brackets (large number), we have to fish out
647 the number from a dummy opcode at the start. */
649 if (stack.currentFrame->number > EXTRACT_BASIC_MAX)
650 stack.currentFrame->number = GET2(stack.currentFrame->prev, 2+LINK_SIZE);
651 stack.currentFrame->offset = stack.currentFrame->number << 1;
654 printf("end bracket %d", stack.currentFrame->number);
658 /* Test for a numbered group. This includes groups called as a result
659 of recursion. Note that whole-pattern recursion is coded as a recurse
660 into group 0, so it won't be picked up here. Instead, we catch it when
661 the OP_END is reached. */
663 if (stack.currentFrame->number > 0) {
664 if (stack.currentFrame->offset >= md->offset_max)
665 md->offset_overflow = true;
667 md->offset_vector[stack.currentFrame->offset] =
668 md->offset_vector[md->offset_end - stack.currentFrame->number];
669 md->offset_vector[stack.currentFrame->offset+1] = stack.currentFrame->eptr - md->start_subject;
670 if (stack.currentFrame->offset_top <= stack.currentFrame->offset)
671 stack.currentFrame->offset_top = stack.currentFrame->offset + 2;
675 /* For a non-repeating ket, just continue at this level. This also
676 happens for a repeating ket if no characters were matched in the group.
677 This is the forcible breaking of infinite loops as implemented in Perl
678 5.005. If there is an options reset, it will get obeyed in the normal
681 if (*stack.currentFrame->ecode == OP_KET || stack.currentFrame->eptr == stack.currentFrame->saved_eptr) {
682 stack.currentFrame->ecode += 1 + LINK_SIZE;
686 /* The repeating kets try the rest of the pattern or restart from the
687 preceding bracket, in the appropriate order. */
689 if (*stack.currentFrame->ecode == OP_KETRMIN) {
690 RMATCH(16, stack.currentFrame->ecode + 1+LINK_SIZE, stack.currentFrame->eptrb, 0);
693 RMATCH(17, stack.currentFrame->prev, stack.currentFrame->eptrb, match_isgroup);
696 } else { /* OP_KETRMAX */
697 RMATCH(18, stack.currentFrame->prev, stack.currentFrame->eptrb, match_isgroup);
700 RMATCH(19, stack.currentFrame->ecode + 1+LINK_SIZE, stack.currentFrame->eptrb, 0);
706 /* Start of subject, or after internal newline if multiline. */
709 if (stack.currentFrame->eptr != md->start_subject && (!md->multiline || !isNewline(stack.currentFrame->eptr[-1])))
711 stack.currentFrame->ecode++;
714 /* End of subject, or before internal newline if multiline. */
717 if (stack.currentFrame->eptr < md->end_subject && (!md->multiline || !isNewline(*stack.currentFrame->eptr)))
719 stack.currentFrame->ecode++;
722 /* Word boundary assertions */
724 BEGIN_OPCODE(NOT_WORD_BOUNDARY):
725 BEGIN_OPCODE(WORD_BOUNDARY):
726 /* Find out if the previous and current characters are "word" characters.
727 It takes a bit more work in UTF-8 mode. Characters > 128 are assumed to
728 be "non-word" characters. */
730 if (stack.currentFrame->eptr == md->start_subject)
731 prev_is_word = false;
733 const pcre_uchar *lastptr = stack.currentFrame->eptr - 1;
734 while(ISMIDCHAR(*lastptr))
737 prev_is_word = c < 128 && (md->ctypes[c] & ctype_word) != 0;
739 if (stack.currentFrame->eptr >= md->end_subject)
742 GETCHAR(c, stack.currentFrame->eptr);
743 cur_is_word = c < 128 && (md->ctypes[c] & ctype_word) != 0;
746 /* Now see if the situation is what we want */
748 if ((*stack.currentFrame->ecode++ == OP_WORD_BOUNDARY) ? cur_is_word == prev_is_word : cur_is_word != prev_is_word)
752 /* Match a single character type; inline for speed */
755 if (stack.currentFrame->eptr < md->end_subject && isNewline(*stack.currentFrame->eptr))
757 if (stack.currentFrame->eptr++ >= md->end_subject)
759 while (stack.currentFrame->eptr < md->end_subject && ISMIDCHAR(*stack.currentFrame->eptr))
760 stack.currentFrame->eptr++;
761 stack.currentFrame->ecode++;
764 BEGIN_OPCODE(NOT_DIGIT):
765 if (stack.currentFrame->eptr >= md->end_subject)
767 GETCHARINCTEST(c, stack.currentFrame->eptr);
770 stack.currentFrame->ecode++;
774 if (stack.currentFrame->eptr >= md->end_subject)
776 GETCHARINCTEST(c, stack.currentFrame->eptr);
777 if (!isASCIIDigit(c))
779 stack.currentFrame->ecode++;
782 BEGIN_OPCODE(NOT_WHITESPACE):
783 if (stack.currentFrame->eptr >= md->end_subject)
785 GETCHARINCTEST(c, stack.currentFrame->eptr);
786 if (c < 128 && (md->ctypes[c] & ctype_space))
788 stack.currentFrame->ecode++;
791 BEGIN_OPCODE(WHITESPACE):
792 if (stack.currentFrame->eptr >= md->end_subject)
794 GETCHARINCTEST(c, stack.currentFrame->eptr);
795 if (c >= 128 || !(md->ctypes[c] & ctype_space))
797 stack.currentFrame->ecode++;
800 BEGIN_OPCODE(NOT_WORDCHAR):
801 if (stack.currentFrame->eptr >= md->end_subject)
803 GETCHARINCTEST(c, stack.currentFrame->eptr);
804 if (c < 128 && (md->ctypes[c] & ctype_word))
806 stack.currentFrame->ecode++;
809 BEGIN_OPCODE(WORDCHAR):
810 if (stack.currentFrame->eptr >= md->end_subject)
812 GETCHARINCTEST(c, stack.currentFrame->eptr);
813 if (c >= 128 || !(md->ctypes[c] & ctype_word))
815 stack.currentFrame->ecode++;
818 /* Match a back reference, possibly repeatedly. Look past the end of the
819 item to see if there is repeat information following. The code is similar
820 to that for character classes, but repeated for efficiency. Then obey
821 similar code to character type repeats - written out again for speed.
822 However, if the referenced string is the empty string, always treat
823 it as matched, any number of times (otherwise there could be infinite
827 stack.currentFrame->offset = GET2(stack.currentFrame->ecode, 1) << 1; /* Doubled ref number */
828 stack.currentFrame->ecode += 3; /* Advance past item */
830 /* If the reference is unset, set the length to be longer than the amount
831 of subject left; this ensures that every attempt at a match fails. We
832 can't just fail here, because of the possibility of quantifiers with zero
835 if (stack.currentFrame->offset >= stack.currentFrame->offset_top || md->offset_vector[stack.currentFrame->offset] < 0)
836 stack.currentFrame->length = 0;
838 stack.currentFrame->length = md->offset_vector[stack.currentFrame->offset+1] - md->offset_vector[stack.currentFrame->offset];
840 /* Set up for repetition, or handle the non-repeated case */
842 switch (*stack.currentFrame->ecode) {
849 c = *stack.currentFrame->ecode++ - OP_CRSTAR;
850 minimize = (c & 1) != 0;
851 min = rep_min[c]; /* Pick up values from tables; */
852 stack.currentFrame->max = rep_max[c]; /* zero for max => infinity */
853 if (stack.currentFrame->max == 0)
854 stack.currentFrame->max = INT_MAX;
859 minimize = (*stack.currentFrame->ecode == OP_CRMINRANGE);
860 min = GET2(stack.currentFrame->ecode, 1);
861 stack.currentFrame->max = GET2(stack.currentFrame->ecode, 3);
862 if (stack.currentFrame->max == 0)
863 stack.currentFrame->max = INT_MAX;
864 stack.currentFrame->ecode += 5;
867 default: /* No repeat follows */
868 if (!match_ref(stack.currentFrame->offset, stack.currentFrame->eptr, stack.currentFrame->length, md))
870 stack.currentFrame->eptr += stack.currentFrame->length;
874 /* If the length of the reference is zero, just continue with the
877 if (stack.currentFrame->length == 0)
880 /* First, ensure the minimum number of matches are present. */
882 for (i = 1; i <= min; i++) {
883 if (!match_ref(stack.currentFrame->offset, stack.currentFrame->eptr, stack.currentFrame->length, md))
885 stack.currentFrame->eptr += stack.currentFrame->length;
888 /* If min = max, continue at the same level without recursion.
889 They are not both allowed to be zero. */
891 if (min == stack.currentFrame->max)
894 /* If minimizing, keep trying and advancing the pointer */
897 for (stack.currentFrame->fi = min;; stack.currentFrame->fi++) {
898 RMATCH(20, stack.currentFrame->ecode, stack.currentFrame->eptrb, 0);
901 if (stack.currentFrame->fi >= stack.currentFrame->max || !match_ref(stack.currentFrame->offset, stack.currentFrame->eptr, stack.currentFrame->length, md))
903 stack.currentFrame->eptr += stack.currentFrame->length;
905 ASSERT_NOT_REACHED();
908 /* If maximizing, find the longest string and work backwards */
911 stack.currentFrame->pp = stack.currentFrame->eptr;
912 for (i = min; i < stack.currentFrame->max; i++) {
913 if (!match_ref(stack.currentFrame->offset, stack.currentFrame->eptr, stack.currentFrame->length, md))
915 stack.currentFrame->eptr += stack.currentFrame->length;
917 while (stack.currentFrame->eptr >= stack.currentFrame->pp) {
918 RMATCH(21, stack.currentFrame->ecode, stack.currentFrame->eptrb, 0);
921 stack.currentFrame->eptr -= stack.currentFrame->length;
925 ASSERT_NOT_REACHED();
927 /* Match a bit-mapped character class, possibly repeatedly. This op code is
928 used when all the characters in the class have values in the range 0-255,
929 and either the matching is caseful, or the characters are in the range
930 0-127 when UTF-8 processing is enabled. The only difference between
931 OP_CLASS and OP_NCLASS occurs when a data character outside the range is
934 First, look past the end of the item to see if there is repeat information
935 following. Then obey similar code to character type repeats - written out
938 BEGIN_OPCODE(NCLASS):
940 stack.currentFrame->data = stack.currentFrame->ecode + 1; /* Save for matching */
941 stack.currentFrame->ecode += 33; /* Advance past the item */
943 switch (*stack.currentFrame->ecode) {
950 c = *stack.currentFrame->ecode++ - OP_CRSTAR;
951 minimize = (c & 1) != 0;
952 min = rep_min[c]; /* Pick up values from tables; */
953 stack.currentFrame->max = rep_max[c]; /* zero for max => infinity */
954 if (stack.currentFrame->max == 0)
955 stack.currentFrame->max = INT_MAX;
960 minimize = (*stack.currentFrame->ecode == OP_CRMINRANGE);
961 min = GET2(stack.currentFrame->ecode, 1);
962 stack.currentFrame->max = GET2(stack.currentFrame->ecode, 3);
963 if (stack.currentFrame->max == 0)
964 stack.currentFrame->max = INT_MAX;
965 stack.currentFrame->ecode += 5;
968 default: /* No repeat follows */
969 min = stack.currentFrame->max = 1;
973 /* First, ensure the minimum number of matches are present. */
975 for (i = 1; i <= min; i++) {
976 if (stack.currentFrame->eptr >= md->end_subject)
978 GETCHARINC(c, stack.currentFrame->eptr);
980 if (stack.currentFrame->data[-1] == OP_CLASS)
983 if ((stack.currentFrame->data[c/8] & (1 << (c&7))) == 0)
988 /* If max == min we can continue with the main loop without the
991 if (min == stack.currentFrame->max)
994 /* If minimizing, keep testing the rest of the expression and advancing
995 the pointer while it matches the class. */
998 for (stack.currentFrame->fi = min;; stack.currentFrame->fi++) {
999 RMATCH(22, stack.currentFrame->ecode, stack.currentFrame->eptrb, 0);
1002 if (stack.currentFrame->fi >= stack.currentFrame->max || stack.currentFrame->eptr >= md->end_subject)
1004 GETCHARINC(c, stack.currentFrame->eptr);
1006 if (stack.currentFrame->data[-1] == OP_CLASS)
1009 if ((stack.currentFrame->data[c/8] & (1 << (c&7))) == 0)
1014 ASSERT_NOT_REACHED();
1016 /* If maximizing, find the longest possible run, then work backwards. */
1018 stack.currentFrame->pp = stack.currentFrame->eptr;
1020 for (i = min; i < stack.currentFrame->max; i++) {
1022 if (stack.currentFrame->eptr >= md->end_subject)
1024 GETCHARLEN(c, stack.currentFrame->eptr, len);
1026 if (stack.currentFrame->data[-1] == OP_CLASS)
1029 if ((stack.currentFrame->data[c/8] & (1 << (c&7))) == 0)
1032 stack.currentFrame->eptr += len;
1035 RMATCH(24, stack.currentFrame->ecode, stack.currentFrame->eptrb, 0);
1038 if (stack.currentFrame->eptr-- == stack.currentFrame->pp)
1039 break; /* Stop if tried at original pos */
1040 BACKCHAR(stack.currentFrame->eptr);
1045 ASSERT_NOT_REACHED();
1047 /* Match an extended character class. This opcode is encountered only
1048 in UTF-8 mode, because that's the only time it is compiled. */
1050 BEGIN_OPCODE(XCLASS):
1051 stack.currentFrame->data = stack.currentFrame->ecode + 1 + LINK_SIZE; /* Save for matching */
1052 stack.currentFrame->ecode += GET(stack.currentFrame->ecode, 1); /* Advance past the item */
1054 switch (*stack.currentFrame->ecode) {
1061 c = *stack.currentFrame->ecode++ - OP_CRSTAR;
1062 minimize = (c & 1) != 0;
1063 min = rep_min[c]; /* Pick up values from tables; */
1064 stack.currentFrame->max = rep_max[c]; /* zero for max => infinity */
1065 if (stack.currentFrame->max == 0)
1066 stack.currentFrame->max = INT_MAX;
1071 minimize = (*stack.currentFrame->ecode == OP_CRMINRANGE);
1072 min = GET2(stack.currentFrame->ecode, 1);
1073 stack.currentFrame->max = GET2(stack.currentFrame->ecode, 3);
1074 if (stack.currentFrame->max == 0)
1075 stack.currentFrame->max = INT_MAX;
1076 stack.currentFrame->ecode += 5;
1079 default: /* No repeat follows */
1080 min = stack.currentFrame->max = 1;
1083 /* First, ensure the minimum number of matches are present. */
1085 for (i = 1; i <= min; i++) {
1086 if (stack.currentFrame->eptr >= md->end_subject)
1088 GETCHARINC(c, stack.currentFrame->eptr);
1089 if (!_pcre_xclass(c, stack.currentFrame->data))
1093 /* If max == min we can continue with the main loop without the
1096 if (min == stack.currentFrame->max)
1099 /* If minimizing, keep testing the rest of the expression and advancing
1100 the pointer while it matches the class. */
1103 for (stack.currentFrame->fi = min;; stack.currentFrame->fi++) {
1104 RMATCH(26, stack.currentFrame->ecode, stack.currentFrame->eptrb, 0);
1107 if (stack.currentFrame->fi >= stack.currentFrame->max || stack.currentFrame->eptr >= md->end_subject)
1109 GETCHARINC(c, stack.currentFrame->eptr);
1110 if (!_pcre_xclass(c, stack.currentFrame->data))
1113 ASSERT_NOT_REACHED();
1116 /* If maximizing, find the longest possible run, then work backwards. */
1119 stack.currentFrame->pp = stack.currentFrame->eptr;
1120 for (i = min; i < stack.currentFrame->max; i++) {
1122 if (stack.currentFrame->eptr >= md->end_subject)
1124 GETCHARLEN(c, stack.currentFrame->eptr, len);
1125 if (!_pcre_xclass(c, stack.currentFrame->data))
1127 stack.currentFrame->eptr += len;
1130 RMATCH(27, stack.currentFrame->ecode, stack.currentFrame->eptrb, 0);
1133 if (stack.currentFrame->eptr-- == stack.currentFrame->pp)
1134 break; /* Stop if tried at original pos */
1135 BACKCHAR(stack.currentFrame->eptr)
1140 ASSERT_NOT_REACHED();
1142 /* Match a single character, casefully */
1145 stack.currentFrame->length = 1;
1146 stack.currentFrame->ecode++;
1147 GETUTF8CHARLEN(stack.currentFrame->fc, stack.currentFrame->ecode, stack.currentFrame->length);
1150 stack.currentFrame->ecode += stack.currentFrame->length;
1151 switch (md->end_subject - stack.currentFrame->eptr) {
1155 dc = *stack.currentFrame->eptr++;
1156 if (IS_LEADING_SURROGATE(dc))
1160 GETCHARINC(dc, stack.currentFrame->eptr);
1162 if (stack.currentFrame->fc != dc)
1167 /* Match a single character, caselessly */
1169 BEGIN_OPCODE(CHARNC):
1170 stack.currentFrame->length = 1;
1171 stack.currentFrame->ecode++;
1172 GETUTF8CHARLEN(stack.currentFrame->fc, stack.currentFrame->ecode, stack.currentFrame->length);
1174 if (md->end_subject - stack.currentFrame->eptr == 0)
1179 if (md->end_subject - stack.currentFrame->eptr == 1) {
1180 dc = *stack.currentFrame->eptr++;
1181 if (IS_LEADING_SURROGATE(dc))
1184 GETCHARINC(dc, stack.currentFrame->eptr);
1185 stack.currentFrame->ecode += stack.currentFrame->length;
1187 /* If we have Unicode property support, we can use it to test the other
1188 case of the character, if there is one. */
1190 if (stack.currentFrame->fc != dc) {
1191 if (dc != _pcre_ucp_othercase(stack.currentFrame->fc))
1197 /* Match a single ASCII character. */
1199 BEGIN_OPCODE(ASCII_CHAR):
1200 if (md->end_subject == stack.currentFrame->eptr)
1202 if (*stack.currentFrame->eptr != stack.currentFrame->ecode[1])
1204 ++stack.currentFrame->eptr;
1205 stack.currentFrame->ecode += 2;
1208 /* Match one of two cases of an ASCII character. */
1210 BEGIN_OPCODE(ASCII_LETTER_NC):
1211 if (md->end_subject == stack.currentFrame->eptr)
1213 if ((*stack.currentFrame->eptr | 0x20) != stack.currentFrame->ecode[1])
1215 ++stack.currentFrame->eptr;
1216 stack.currentFrame->ecode += 2;
1219 /* Match a single character repeatedly; different opcodes share code. */
1221 BEGIN_OPCODE(EXACT):
1222 min = stack.currentFrame->max = GET2(stack.currentFrame->ecode, 1);
1224 stack.currentFrame->ecode += 3;
1228 BEGIN_OPCODE(MINUPTO):
1230 stack.currentFrame->max = GET2(stack.currentFrame->ecode, 1);
1231 minimize = *stack.currentFrame->ecode == OP_MINUPTO;
1232 stack.currentFrame->ecode += 3;
1236 BEGIN_OPCODE(MINSTAR):
1238 BEGIN_OPCODE(MINPLUS):
1239 BEGIN_OPCODE(QUERY):
1240 BEGIN_OPCODE(MINQUERY):
1241 c = *stack.currentFrame->ecode++ - OP_STAR;
1242 minimize = (c & 1) != 0;
1243 min = rep_min[c]; /* Pick up values from tables; */
1244 stack.currentFrame->max = rep_max[c]; /* zero for max => infinity */
1245 if (stack.currentFrame->max == 0)
1246 stack.currentFrame->max = INT_MAX;
1248 /* Common code for all repeated single-character matches. We can give
1249 up quickly if there are fewer than the minimum number of characters left in
1254 stack.currentFrame->length = 1;
1255 GETUTF8CHARLEN(stack.currentFrame->fc, stack.currentFrame->ecode, stack.currentFrame->length);
1256 if (min * (stack.currentFrame->fc > 0xFFFF ? 2 : 1) > md->end_subject - stack.currentFrame->eptr)
1258 stack.currentFrame->ecode += stack.currentFrame->length;
1260 if (stack.currentFrame->fc <= 0xFFFF) {
1261 int othercase = md->caseless ? _pcre_ucp_othercase(stack.currentFrame->fc) : -1;
1263 for (i = 1; i <= min; i++) {
1264 if (*stack.currentFrame->eptr != stack.currentFrame->fc && *stack.currentFrame->eptr != othercase)
1266 ++stack.currentFrame->eptr;
1269 if (min == stack.currentFrame->max)
1273 stack.currentFrame->repeat_othercase = othercase;
1274 for (stack.currentFrame->fi = min;; stack.currentFrame->fi++) {
1275 RMATCH(28, stack.currentFrame->ecode, stack.currentFrame->eptrb, 0);
1278 if (stack.currentFrame->fi >= stack.currentFrame->max || stack.currentFrame->eptr >= md->end_subject)
1280 if (*stack.currentFrame->eptr != stack.currentFrame->fc && *stack.currentFrame->eptr != stack.currentFrame->repeat_othercase)
1282 ++stack.currentFrame->eptr;
1284 ASSERT_NOT_REACHED();
1286 stack.currentFrame->pp = stack.currentFrame->eptr;
1287 for (i = min; i < stack.currentFrame->max; i++) {
1288 if (stack.currentFrame->eptr >= md->end_subject)
1290 if (*stack.currentFrame->eptr != stack.currentFrame->fc && *stack.currentFrame->eptr != othercase)
1292 ++stack.currentFrame->eptr;
1294 while (stack.currentFrame->eptr >= stack.currentFrame->pp) {
1295 RMATCH(29, stack.currentFrame->ecode, stack.currentFrame->eptrb, 0);
1298 --stack.currentFrame->eptr;
1302 ASSERT_NOT_REACHED();
1304 /* No case on surrogate pairs, so no need to bother with "othercase". */
1306 for (i = 1; i <= min; i++) {
1308 GETCHAR(nc, stack.currentFrame->eptr);
1309 if (nc != stack.currentFrame->fc)
1311 stack.currentFrame->eptr += 2;
1314 if (min == stack.currentFrame->max)
1318 for (stack.currentFrame->fi = min;; stack.currentFrame->fi++) {
1320 RMATCH(30, stack.currentFrame->ecode, stack.currentFrame->eptrb, 0);
1323 if (stack.currentFrame->fi >= stack.currentFrame->max || stack.currentFrame->eptr >= md->end_subject)
1325 GETCHAR(nc, stack.currentFrame->eptr);
1326 if (*stack.currentFrame->eptr != stack.currentFrame->fc)
1328 stack.currentFrame->eptr += 2;
1330 ASSERT_NOT_REACHED();
1332 stack.currentFrame->pp = stack.currentFrame->eptr;
1333 for (i = min; i < stack.currentFrame->max; i++) {
1335 if (stack.currentFrame->eptr > md->end_subject - 2)
1337 GETCHAR(nc, stack.currentFrame->eptr);
1338 if (*stack.currentFrame->eptr != stack.currentFrame->fc)
1340 stack.currentFrame->eptr += 2;
1342 while (stack.currentFrame->eptr >= stack.currentFrame->pp) {
1343 RMATCH(31, stack.currentFrame->ecode, stack.currentFrame->eptrb, 0);
1346 stack.currentFrame->eptr -= 2;
1350 ASSERT_NOT_REACHED();
1352 ASSERT_NOT_REACHED();
1354 /* Match a negated single one-byte character. The character we are
1355 checking can be multibyte. */
1358 if (stack.currentFrame->eptr >= md->end_subject)
1360 stack.currentFrame->ecode++;
1361 GETCHARINCTEST(c, stack.currentFrame->eptr);
1365 if (md->lcc[*stack.currentFrame->ecode++] == c)
1368 if (*stack.currentFrame->ecode++ == c)
1373 /* Match a negated single one-byte character repeatedly. This is almost a
1374 repeat of the code for a repeated single character, but I haven't found a
1375 nice way of commoning these up that doesn't require a test of the
1376 positive/negative option for each character match. Maybe that wouldn't add
1377 very much to the time taken, but character matching *is* what this is all
1380 BEGIN_OPCODE(NOTEXACT):
1381 min = stack.currentFrame->max = GET2(stack.currentFrame->ecode, 1);
1383 stack.currentFrame->ecode += 3;
1386 BEGIN_OPCODE(NOTUPTO):
1387 BEGIN_OPCODE(NOTMINUPTO):
1389 stack.currentFrame->max = GET2(stack.currentFrame->ecode, 1);
1390 minimize = *stack.currentFrame->ecode == OP_NOTMINUPTO;
1391 stack.currentFrame->ecode += 3;
1394 BEGIN_OPCODE(NOTSTAR):
1395 BEGIN_OPCODE(NOTMINSTAR):
1396 BEGIN_OPCODE(NOTPLUS):
1397 BEGIN_OPCODE(NOTMINPLUS):
1398 BEGIN_OPCODE(NOTQUERY):
1399 BEGIN_OPCODE(NOTMINQUERY):
1400 c = *stack.currentFrame->ecode++ - OP_NOTSTAR;
1401 minimize = (c & 1) != 0;
1402 min = rep_min[c]; /* Pick up values from tables; */
1403 stack.currentFrame->max = rep_max[c]; /* zero for max => infinity */
1404 if (stack.currentFrame->max == 0) stack.currentFrame->max = INT_MAX;
1406 /* Common code for all repeated single-byte matches. We can give up quickly
1407 if there are fewer than the minimum number of bytes left in the
1411 if (min > md->end_subject - stack.currentFrame->eptr)
1413 stack.currentFrame->fc = *stack.currentFrame->ecode++;
1415 /* The code is duplicated for the caseless and caseful cases, for speed,
1416 since matching characters is likely to be quite common. First, ensure the
1417 minimum number of matches are present. If min = max, continue at the same
1418 level without recursing. Otherwise, if minimizing, keep trying the rest of
1419 the expression and advancing one matching character if failing, up to the
1420 maximum. Alternatively, if maximizing, find the maximum number of
1421 characters and work backwards. */
1423 DPRINTF(("negative matching %c{%d,%d}\n", stack.currentFrame->fc, min, stack.currentFrame->max));
1426 if (stack.currentFrame->fc < 128)
1427 stack.currentFrame->fc = md->lcc[stack.currentFrame->fc];
1431 for (i = 1; i <= min; i++) {
1432 GETCHARINC(d, stack.currentFrame->eptr);
1435 if (stack.currentFrame->fc == d)
1440 if (min == stack.currentFrame->max)
1445 for (stack.currentFrame->fi = min;; stack.currentFrame->fi++) {
1446 RMATCH(38, stack.currentFrame->ecode, stack.currentFrame->eptrb, 0);
1449 GETCHARINC(d, stack.currentFrame->eptr);
1452 if (stack.currentFrame->fi >= stack.currentFrame->max || stack.currentFrame->eptr >= md->end_subject || stack.currentFrame->fc == d)
1455 ASSERT_NOT_REACHED();
1461 stack.currentFrame->pp = stack.currentFrame->eptr;
1465 for (i = min; i < stack.currentFrame->max; i++) {
1467 if (stack.currentFrame->eptr >= md->end_subject)
1469 GETCHARLEN(d, stack.currentFrame->eptr, len);
1472 if (stack.currentFrame->fc == d)
1474 stack.currentFrame->eptr += len;
1477 RMATCH(40, stack.currentFrame->ecode, stack.currentFrame->eptrb, 0);
1480 if (stack.currentFrame->eptr-- == stack.currentFrame->pp)
1481 break; /* Stop if tried at original pos */
1482 BACKCHAR(stack.currentFrame->eptr);
1488 ASSERT_NOT_REACHED();
1491 /* Caseful comparisons */
1496 for (i = 1; i <= min; i++) {
1497 GETCHARINC(d, stack.currentFrame->eptr);
1498 if (stack.currentFrame->fc == d)
1503 if (min == stack.currentFrame->max)
1508 for (stack.currentFrame->fi = min;; stack.currentFrame->fi++) {
1509 RMATCH(42, stack.currentFrame->ecode, stack.currentFrame->eptrb, 0);
1512 GETCHARINC(d, stack.currentFrame->eptr);
1513 if (stack.currentFrame->fi >= stack.currentFrame->max || stack.currentFrame->eptr >= md->end_subject || stack.currentFrame->fc == d)
1516 ASSERT_NOT_REACHED();
1522 stack.currentFrame->pp = stack.currentFrame->eptr;
1526 for (i = min; i < stack.currentFrame->max; i++) {
1528 if (stack.currentFrame->eptr >= md->end_subject)
1530 GETCHARLEN(d, stack.currentFrame->eptr, len);
1531 if (stack.currentFrame->fc == d)
1533 stack.currentFrame->eptr += len;
1536 RMATCH(44, stack.currentFrame->ecode, stack.currentFrame->eptrb, 0);
1539 if (stack.currentFrame->eptr-- == stack.currentFrame->pp)
1540 break; /* Stop if tried at original pos */
1541 BACKCHAR(stack.currentFrame->eptr);
1548 ASSERT_NOT_REACHED();
1550 /* Match a single character type repeatedly; several different opcodes
1551 share code. This is very similar to the code for single characters, but we
1552 repeat it in the interests of efficiency. */
1554 BEGIN_OPCODE(TYPEEXACT):
1555 min = stack.currentFrame->max = GET2(stack.currentFrame->ecode, 1);
1557 stack.currentFrame->ecode += 3;
1560 BEGIN_OPCODE(TYPEUPTO):
1561 BEGIN_OPCODE(TYPEMINUPTO):
1563 stack.currentFrame->max = GET2(stack.currentFrame->ecode, 1);
1564 minimize = *stack.currentFrame->ecode == OP_TYPEMINUPTO;
1565 stack.currentFrame->ecode += 3;
1568 BEGIN_OPCODE(TYPESTAR):
1569 BEGIN_OPCODE(TYPEMINSTAR):
1570 BEGIN_OPCODE(TYPEPLUS):
1571 BEGIN_OPCODE(TYPEMINPLUS):
1572 BEGIN_OPCODE(TYPEQUERY):
1573 BEGIN_OPCODE(TYPEMINQUERY):
1574 c = *stack.currentFrame->ecode++ - OP_TYPESTAR;
1575 minimize = (c & 1) != 0;
1576 min = rep_min[c]; /* Pick up values from tables; */
1577 stack.currentFrame->max = rep_max[c]; /* zero for max => infinity */
1578 if (stack.currentFrame->max == 0)
1579 stack.currentFrame->max = INT_MAX;
1581 /* Common code for all repeated single character type matches. Note that
1582 in UTF-8 mode, '.' matches a character of any length, but for the other
1583 character types, the valid characters are all one-byte long. */
1586 stack.currentFrame->ctype = *stack.currentFrame->ecode++; /* Code for the character type */
1588 /* First, ensure the minimum number of matches are present. Use inline
1589 code for maximizing the speed, and do the type test once at the start
1590 (i.e. keep it out of the loop). Also we can test that there are at least
1591 the minimum number of bytes before we start. This isn't as effective in
1592 UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that
1593 is tidier. Also separate the UCP code, which can be the same for both UTF-8
1594 and single-bytes. */
1596 if (min > md->end_subject - stack.currentFrame->eptr)
1599 switch(stack.currentFrame->ctype) {
1601 for (i = 1; i <= min; i++) {
1602 if (stack.currentFrame->eptr >= md->end_subject || isNewline(*stack.currentFrame->eptr))
1604 ++stack.currentFrame->eptr;
1605 while (stack.currentFrame->eptr < md->end_subject && ISMIDCHAR(*stack.currentFrame->eptr))
1606 stack.currentFrame->eptr++;
1611 for (i = 1; i <= min; i++) {
1612 if (stack.currentFrame->eptr >= md->end_subject)
1614 GETCHARINC(c, stack.currentFrame->eptr);
1615 if (isASCIIDigit(c))
1621 for (i = 1; i <= min; i++) {
1622 if (stack.currentFrame->eptr >= md->end_subject || !isASCIIDigit(*stack.currentFrame->eptr++))
1624 /* No need to skip more bytes - we know it's a 1-byte character */
1628 case OP_NOT_WHITESPACE:
1629 for (i = 1; i <= min; i++) {
1630 if (stack.currentFrame->eptr >= md->end_subject ||
1631 (*stack.currentFrame->eptr < 128 && (md->ctypes[*stack.currentFrame->eptr] & ctype_space) != 0))
1633 while (++stack.currentFrame->eptr < md->end_subject && ISMIDCHAR(*stack.currentFrame->eptr));
1638 for (i = 1; i <= min; i++) {
1639 if (stack.currentFrame->eptr >= md->end_subject ||
1640 *stack.currentFrame->eptr >= 128 || (md->ctypes[*stack.currentFrame->eptr++] & ctype_space) == 0)
1642 /* No need to skip more bytes - we know it's a 1-byte character */
1646 case OP_NOT_WORDCHAR:
1647 for (i = 1; i <= min; i++) {
1648 if (stack.currentFrame->eptr >= md->end_subject ||
1649 (*stack.currentFrame->eptr < 128 && (md->ctypes[*stack.currentFrame->eptr] & ctype_word) != 0))
1651 while (++stack.currentFrame->eptr < md->end_subject && ISMIDCHAR(*stack.currentFrame->eptr));
1656 for (i = 1; i <= min; i++) {
1657 if (stack.currentFrame->eptr >= md->end_subject ||
1658 *stack.currentFrame->eptr >= 128 || (md->ctypes[*stack.currentFrame->eptr++] & ctype_word) == 0)
1660 /* No need to skip more bytes - we know it's a 1-byte character */
1665 ASSERT_NOT_REACHED();
1666 RRETURN_ERROR(JSRegExpErrorInternal);
1667 } /* End switch(stack.currentFrame->ctype) */
1670 /* If min = max, continue at the same level without recursing */
1672 if (min == stack.currentFrame->max)
1675 /* If minimizing, we have to test the rest of the pattern before each
1676 subsequent match. */
1679 for (stack.currentFrame->fi = min;; stack.currentFrame->fi++) {
1680 RMATCH(48, stack.currentFrame->ecode, stack.currentFrame->eptrb, 0);
1683 if (stack.currentFrame->fi >= stack.currentFrame->max || stack.currentFrame->eptr >= md->end_subject)
1686 GETCHARINC(c, stack.currentFrame->eptr);
1687 switch(stack.currentFrame->ctype) {
1694 if (isASCIIDigit(c))
1699 if (!isASCIIDigit(c))
1703 case OP_NOT_WHITESPACE:
1704 if (c < 128 && (md->ctypes[c] & ctype_space))
1709 if (c >= 128 || !(md->ctypes[c] & ctype_space))
1713 case OP_NOT_WORDCHAR:
1714 if (c < 128 && (md->ctypes[c] & ctype_word))
1719 if (c >= 128 || !(md->ctypes[c] & ctype_word))
1724 ASSERT_NOT_REACHED();
1725 RRETURN_ERROR(JSRegExpErrorInternal);
1728 ASSERT_NOT_REACHED();
1731 /* If maximizing it is worth using inline code for speed, doing the type
1732 test once at the start (i.e. keep it out of the loop). */
1735 stack.currentFrame->pp = stack.currentFrame->eptr; /* Remember where we started */
1737 switch(stack.currentFrame->ctype) {
1740 /* Special code is required for UTF8, but when the maximum is unlimited
1741 we don't need it, so we repeat the non-UTF8 code. This is probably
1742 worth it, because .* is quite a common idiom. */
1744 if (stack.currentFrame->max < INT_MAX) {
1745 for (i = min; i < stack.currentFrame->max; i++) {
1746 if (stack.currentFrame->eptr >= md->end_subject || isNewline(*stack.currentFrame->eptr))
1748 stack.currentFrame->eptr++;
1749 while (stack.currentFrame->eptr < md->end_subject && (*stack.currentFrame->eptr & 0xc0) == 0x80)
1750 stack.currentFrame->eptr++;
1754 /* Handle unlimited UTF-8 repeat */
1757 for (i = min; i < stack.currentFrame->max; i++) {
1758 if (stack.currentFrame->eptr >= md->end_subject || isNewline(*stack.currentFrame->eptr))
1760 stack.currentFrame->eptr++;
1767 for (i = min; i < stack.currentFrame->max; i++) {
1769 if (stack.currentFrame->eptr >= md->end_subject)
1771 GETCHARLEN(c, stack.currentFrame->eptr, len);
1772 if (isASCIIDigit(c))
1774 stack.currentFrame->eptr+= len;
1779 for (i = min; i < stack.currentFrame->max; i++) {
1781 if (stack.currentFrame->eptr >= md->end_subject)
1783 GETCHARLEN(c, stack.currentFrame->eptr, len);
1784 if (!isASCIIDigit(c))
1786 stack.currentFrame->eptr+= len;
1790 case OP_NOT_WHITESPACE:
1791 for (i = min; i < stack.currentFrame->max; i++) {
1793 if (stack.currentFrame->eptr >= md->end_subject)
1795 GETCHARLEN(c, stack.currentFrame->eptr, len);
1796 if (c < 128 && (md->ctypes[c] & ctype_space))
1798 stack.currentFrame->eptr+= len;
1803 for (i = min; i < stack.currentFrame->max; i++) {
1805 if (stack.currentFrame->eptr >= md->end_subject)
1807 GETCHARLEN(c, stack.currentFrame->eptr, len);
1808 if (c >= 128 || !(md->ctypes[c] & ctype_space))
1810 stack.currentFrame->eptr+= len;
1814 case OP_NOT_WORDCHAR:
1815 for (i = min; i < stack.currentFrame->max; i++) {
1817 if (stack.currentFrame->eptr >= md->end_subject)
1819 GETCHARLEN(c, stack.currentFrame->eptr, len);
1820 if (c < 128 && (md->ctypes[c] & ctype_word))
1822 stack.currentFrame->eptr+= len;
1827 for (i = min; i < stack.currentFrame->max; i++) {
1829 if (stack.currentFrame->eptr >= md->end_subject)
1831 GETCHARLEN(c, stack.currentFrame->eptr, len);
1832 if (c >= 128 || !(md->ctypes[c] & ctype_word))
1834 stack.currentFrame->eptr+= len;
1839 ASSERT_NOT_REACHED();
1840 RRETURN_ERROR(JSRegExpErrorInternal);
1843 /* stack.currentFrame->eptr is now past the end of the maximum run */
1846 RMATCH(52, stack.currentFrame->ecode, stack.currentFrame->eptrb, 0);
1849 if (stack.currentFrame->eptr-- == stack.currentFrame->pp)
1850 break; /* Stop if tried at original pos */
1851 BACKCHAR(stack.currentFrame->eptr);
1854 /* Get here if we can't make it match with any permitted repetitions */
1858 ASSERT_NOT_REACHED();
1860 BEGIN_OPCODE(CRMINPLUS):
1861 BEGIN_OPCODE(CRMINQUERY):
1862 BEGIN_OPCODE(CRMINRANGE):
1863 BEGIN_OPCODE(CRMINSTAR):
1864 BEGIN_OPCODE(CRPLUS):
1865 BEGIN_OPCODE(CRQUERY):
1866 BEGIN_OPCODE(CRRANGE):
1867 BEGIN_OPCODE(CRSTAR):
1868 ASSERT_NOT_REACHED();
1869 RRETURN_ERROR(JSRegExpErrorInternal);
1871 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
1876 /* Opening capturing bracket. If there is space in the offset vector, save
1877 the current subject position in the working slot at the top of the vector. We
1878 mustn't change the current values of the data slot, because they may be set
1879 from a previous iteration of this group, and be referred to by a reference
1882 If the bracket fails to match, we need to restore this value and also the
1883 values of the final offsets, in case they were set by a previous iteration of
1886 If there isn't enough space in the offset vector, treat this as if it were a
1887 non-capturing bracket. Don't worry about setting the flag for the error case
1888 here; that is handled in the code for KET. */
1890 ASSERT(*stack.currentFrame->ecode > OP_BRA);
1892 stack.currentFrame->number = *stack.currentFrame->ecode - OP_BRA;
1894 /* For extended extraction brackets (large number), we have to fish out the
1895 number from a dummy opcode at the start. */
1897 if (stack.currentFrame->number > EXTRACT_BASIC_MAX)
1898 stack.currentFrame->number = GET2(stack.currentFrame->ecode, 2+LINK_SIZE);
1899 stack.currentFrame->offset = stack.currentFrame->number << 1;
1902 printf("start bracket %d subject=", stack.currentFrame->number);
1903 pchars(stack.currentFrame->eptr, 16, true, md);
1907 if (stack.currentFrame->offset < md->offset_max) {
1908 stack.currentFrame->save_offset1 = md->offset_vector[stack.currentFrame->offset];
1909 stack.currentFrame->save_offset2 = md->offset_vector[stack.currentFrame->offset + 1];
1910 stack.currentFrame->save_offset3 = md->offset_vector[md->offset_end - stack.currentFrame->number];
1912 DPRINTF(("saving %d %d %d\n", stack.currentFrame->save_offset1, stack.currentFrame->save_offset2, stack.currentFrame->save_offset3));
1913 md->offset_vector[md->offset_end - stack.currentFrame->number] = stack.currentFrame->eptr - md->start_subject;
1916 RMATCH(1, stack.currentFrame->ecode + 1 + LINK_SIZE, stack.currentFrame->eptrb, match_isgroup);
1917 if (is_match) RRETURN;
1918 stack.currentFrame->ecode += GET(stack.currentFrame->ecode, 1);
1919 } while (*stack.currentFrame->ecode == OP_ALT);
1921 DPRINTF(("bracket %d failed\n", stack.currentFrame->number));
1923 md->offset_vector[stack.currentFrame->offset] = stack.currentFrame->save_offset1;
1924 md->offset_vector[stack.currentFrame->offset + 1] = stack.currentFrame->save_offset2;
1925 md->offset_vector[md->offset_end - stack.currentFrame->number] = stack.currentFrame->save_offset3;
1930 /* Insufficient room for saving captured contents */
1932 goto NON_CAPTURING_BRACKET;
1935 /* Do not stick any code in here without much thought; it is assumed
1936 that "continue" in the code above comes out to here to repeat the main
1939 } /* End of main loop */
1941 ASSERT_NOT_REACHED();
1943 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
1946 switch (stack.currentFrame->where)
1948 case 0: goto RETURN;
1949 case 1: goto RRETURN_1;
1950 case 2: goto RRETURN_2;
1951 case 6: goto RRETURN_6;
1952 case 7: goto RRETURN_7;
1953 case 9: goto RRETURN_9;
1954 case 10: goto RRETURN_10;
1955 case 11: goto RRETURN_11;
1956 case 12: goto RRETURN_12;
1957 case 13: goto RRETURN_13;
1958 case 14: goto RRETURN_14;
1959 case 15: goto RRETURN_15;
1960 case 16: goto RRETURN_16;
1961 case 17: goto RRETURN_17;
1962 case 18: goto RRETURN_18;
1963 case 19: goto RRETURN_19;
1964 case 20: goto RRETURN_20;
1965 case 21: goto RRETURN_21;
1966 case 22: goto RRETURN_22;
1967 case 24: goto RRETURN_24;
1968 case 26: goto RRETURN_26;
1969 case 27: goto RRETURN_27;
1970 case 28: goto RRETURN_28;
1971 case 29: goto RRETURN_29;
1972 case 30: goto RRETURN_30;
1973 case 31: goto RRETURN_31;
1974 case 38: goto RRETURN_38;
1975 case 40: goto RRETURN_40;
1976 case 42: goto RRETURN_42;
1977 case 44: goto RRETURN_44;
1978 case 48: goto RRETURN_48;
1979 case 52: goto RRETURN_52;
1983 RRETURN_ERROR(JSRegExpErrorInternal);
1988 return is_match ? MATCH_MATCH : MATCH_NOMATCH;
1992 /*************************************************
1993 * Execute a Regular Expression *
1994 *************************************************/
1996 /* This function applies a compiled re to a subject string and picks out
1997 portions of the string if it matches. Two elements in the vector are set for
1998 each substring: the offsets to the start and end of the substring.
2001 argument_re points to the compiled expression
2002 extra_data points to extra data or is NULL
2003 subject points to the subject string
2004 length length of subject string (may contain binary zeros)
2005 start_offset where to start in the subject string
2007 offsets points to a vector of ints to be filled in with offsets
2008 offsetcount the number of elements in the vector
2010 Returns: > 0 => success; value is the number of elements filled in
2011 = 0 => success, but offsets is not big enough
2012 -1 => failed to match
2013 < -1 => some kind of unexpected problem
2017 jsRegExpExecute(const pcre *argument_re,
2018 const UChar* subject, int length, int start_offset, int *offsets,
2021 int rc, resetcount, ocount;
2022 int first_byte = -1;
2025 BOOL using_temporary_offsets = false;
2026 BOOL first_byte_caseless = false;
2028 BOOL req_byte_caseless = false;
2029 match_data match_block;
2030 USPTR start_match = (USPTR)subject + start_offset;
2032 USPTR req_byte_ptr = start_match - 1;
2033 const uschar *start_code;
2035 const real_pcre *external_re = (const real_pcre *)argument_re;
2036 const real_pcre *re = external_re;
2038 /* Plausibility checks */
2042 ASSERT(offsetcount >= 0);
2043 ASSERT(offsets || offsetcount == 0);
2045 /* Set up other data */
2047 startline = (re->options & PCRE_STARTLINE) != 0;
2049 /* The code starts after the real_pcre block and the capture name table. */
2051 start_code = (const uschar *)(external_re + 1);
2053 match_block.start_subject = (USPTR)subject;
2054 match_block.end_subject = match_block.start_subject + length;
2055 end_subject = match_block.end_subject;
2057 match_block.lcc = _pcre_default_tables + lcc_offset;
2058 match_block.ctypes = _pcre_default_tables + ctypes_offset;
2060 match_block.multiline = (re->options & PCRE_MULTILINE) != 0;
2061 match_block.caseless = (re->options & PCRE_CASELESS) != 0;
2063 /* If the expression has got more back references than the offsets supplied can
2064 hold, we get a temporary chunk of working store to use during the matching.
2065 Otherwise, we can use the vector supplied, rounding down its size to a multiple
2068 ocount = offsetcount - (offsetcount % 3);
2070 if (re->top_backref > 0 && re->top_backref >= ocount/3)
2072 ocount = re->top_backref * 3 + 3;
2073 match_block.offset_vector = new int[ocount];
2074 if (match_block.offset_vector == NULL) return JSRegExpErrorNoMemory;
2075 using_temporary_offsets = true;
2076 DPRINTF(("Got memory to hold back references\n"));
2078 else match_block.offset_vector = offsets;
2080 match_block.offset_end = ocount;
2081 match_block.offset_max = (2*ocount)/3;
2082 match_block.offset_overflow = false;
2084 /* Compute the minimum number of offsets that we need to reset each time. Doing
2085 this makes a huge difference to execution time when there aren't many brackets
2088 resetcount = 2 + re->top_bracket * 2;
2089 if (resetcount > offsetcount) resetcount = ocount;
2091 /* Reset the working variable associated with each extraction. These should
2092 never be used unless previously set, but they get saved and restored, and so we
2093 initialize them to avoid reading uninitialized locations. */
2095 if (match_block.offset_vector != NULL)
2097 register int *iptr = match_block.offset_vector + ocount;
2098 register int *iend = iptr - resetcount/2 + 1;
2099 while (--iptr >= iend) *iptr = -1;
2102 /* Set up the first character to match, if available. The first_byte value is
2103 never set for an anchored regular expression, but the anchoring may be forced
2104 at run time, so we have to test for anchoring. The first char may be unset for
2105 an unanchored pattern, of course. If there's no first char and the pattern was
2106 studied, there may be a bitmap of possible first characters. */
2108 if ((re->options & PCRE_FIRSTSET) != 0)
2110 first_byte = re->first_byte & 255;
2111 if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == true)
2112 first_byte = match_block.lcc[first_byte];
2115 /* For anchored or unanchored matches, there may be a "last known required
2118 if ((re->options & PCRE_REQCHSET) != 0)
2120 req_byte = re->req_byte & 255;
2121 req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
2122 req_byte2 = (_pcre_default_tables + fcc_offset)[req_byte]; /* case flipped */
2125 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
2126 the loop runs just once. */
2130 USPTR save_end_subject = end_subject;
2132 /* Reset the maximum number of extractions we might see. */
2134 if (match_block.offset_vector != NULL)
2136 register int *iptr = match_block.offset_vector;
2137 register int *iend = iptr + resetcount;
2138 while (iptr < iend) *iptr++ = -1;
2141 /* Advance to a unique first char if possible. If firstline is true, the
2142 start of the match is constrained to the first line of a multiline string.
2143 Implement this by temporarily adjusting end_subject so that we stop scanning
2144 at a newline. If the match fails at the newline, later code breaks this loop.
2147 /* Now test for a unique first byte */
2149 if (first_byte >= 0)
2151 pcre_uchar first_char = first_byte;
2152 if (first_byte_caseless)
2153 while (start_match < end_subject)
2155 int sm = *start_match;
2158 if (match_block.lcc[sm] == first_char)
2163 while (start_match < end_subject && *start_match != first_char)
2167 /* Or to just after \n for a multiline match if possible */
2171 if (start_match > match_block.start_subject + start_offset)
2173 while (start_match < end_subject && !isNewline(start_match[-1]))
2178 /* Restore fudged end_subject */
2180 end_subject = save_end_subject;
2182 #ifdef DEBUG /* Sigh. Some compilers never learn. */
2183 printf(">>>> Match against: ");
2184 pchars(start_match, end_subject - start_match, true, &match_block);
2188 /* If req_byte is set, we know that that character must appear in the subject
2189 for the match to succeed. If the first character is set, req_byte must be
2190 later in the subject; otherwise the test starts at the match point. This
2191 optimization can save a huge amount of backtracking in patterns with nested
2192 unlimited repeats that aren't going to match. Writing separate code for
2193 cased/caseless versions makes it go faster, as does using an autoincrement
2194 and backing off on a match.
2196 HOWEVER: when the subject string is very, very long, searching to its end can
2197 take a long time, and give bad performance on quite ordinary patterns. This
2198 showed up when somebody was matching /^C/ on a 32-megabyte string... so we
2199 don't do this when the string is sufficiently long.
2201 ALSO: this processing is disabled when partial matching is requested.
2204 if (req_byte >= 0 &&
2205 end_subject - start_match < REQ_BYTE_MAX)
2207 register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
2209 /* We don't need to repeat the search if we haven't yet reached the
2210 place we found it at last time. */
2212 if (p > req_byte_ptr)
2214 if (req_byte_caseless)
2216 while (p < end_subject)
2218 register int pp = *p++;
2219 if (pp == req_byte || pp == req_byte2) { p--; break; }
2224 while (p < end_subject)
2226 if (*p++ == req_byte) { p--; break; }
2230 /* If we can't find the required character, break the matching loop */
2232 if (p >= end_subject) break;
2234 /* If we have found the required character, save the point where we
2235 found it, so that we don't search again next time round the loop if
2236 the start hasn't passed this character yet. */
2242 /* When a match occurs, substrings will be set for all internal extractions;
2243 we just need to set up the whole thing as substring 0 before returning. If
2244 there were too many extractions, set the return code to zero. In the case
2245 where we had to get some local store to hold offsets for backreferences, copy
2246 those back references that we can. In this case there need not be overflow
2247 if certain parts of the pattern were not used. */
2249 match_block.match_call_count = 0;
2251 rc = match(start_match, start_code, 2, &match_block);
2253 /* When the result is no match, if the subject's first character was a
2254 newline and the PCRE_FIRSTLINE option is set, break (which will return
2255 PCRE_ERROR_NOMATCH). The option requests that a match occur before the first
2256 newline in the subject. Otherwise, advance the pointer to the next character
2257 and continue - but the continuation will actually happen only when the
2258 pattern is not anchored. */
2260 if (rc == MATCH_NOMATCH)
2263 while(start_match < end_subject && ISMIDCHAR(*start_match))
2268 if (rc != MATCH_MATCH)
2270 DPRINTF((">>>> error: returning %d\n", rc));
2274 /* We have a match! Copy the offset information from temporary store if
2277 if (using_temporary_offsets)
2279 if (offsetcount >= 4)
2281 memcpy(offsets + 2, match_block.offset_vector + 2,
2282 (offsetcount - 2) * sizeof(int));
2283 DPRINTF(("Copied offsets from temporary memory\n"));
2285 if (match_block.end_offset_top > offsetcount)
2286 match_block.offset_overflow = true;
2288 DPRINTF(("Freeing temporary memory\n"));
2289 delete [] match_block.offset_vector;
2292 rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;
2294 if (offsetcount < 2) rc = 0; else
2296 offsets[0] = start_match - match_block.start_subject;
2297 offsets[1] = match_block.end_match_ptr - match_block.start_subject;
2300 DPRINTF((">>>> returning %d\n", rc));
2304 /* This "while" is the end of the "do" above */
2306 while (start_match <= end_subject);
2308 if (using_temporary_offsets)
2310 DPRINTF(("Freeing temporary memory\n"));
2311 delete [] match_block.offset_vector;
2314 DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
2315 return JSRegExpErrorNoMatch;