1 /* This is JavaScriptCore's variant of the PCRE library. While this library
2 started out as a copy of PCRE, many of the features of PCRE have been
3 removed. This library now supports only the regular expression features
4 required by the JavaScript language specification, and has only the functions
5 needed by JavaScriptCore and the rest of WebKit.
7 Originally written by Philip Hazel
8 Copyright (c) 1997-2006 University of Cambridge
9 Copyright (C) 2002, 2004, 2006, 2007 Apple Inc. All rights reserved.
10 Copyright (C) 2007 Eric Seidel <eric@webkit.org>
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
41 /* This module contains jsRegExpExecute(), the externally visible function
42 that does pattern matching using an NFA algorithm, following the rules from
43 the JavaScript specification. There are also some supporting functions. */
47 #include "pcre_internal.h"
49 #include <wtf/ASCIICType.h>
50 #include <wtf/Vector.h>
55 #define USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
56 //#define USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
59 /* Avoid warnings on Windows. */
63 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
64 typedef int ReturnLocation;
66 typedef void* ReturnLocation;
70 ReturnLocation returnLocation;
71 struct MatchFrame* previousFrame;
73 /* Function arguments that may change */
75 const UChar* subjectPtr;
76 const uschar* instructionPtr;
78 const UChar* subpatternStart;
82 /* PCRE uses "fake" recursion built off of gotos, thus
83 stack-based local variables are not safe to use. Instead we have to
84 store local variables on the current MatchFrame. */
87 const uschar* startOfRepeatingBracket;
88 const UChar* subjectPtrAtStartOfInstruction; // Several instrutions stash away a subjectPtr here for later compare
89 const uschar* instructionPtrAtStartOfOnce;
104 const UChar* subpatternStart;
108 /* Structure for passing "static" information around between the functions
109 doing traditional NFA matching, so that they are thread-safe. */
112 int* offset_vector; /* Offset vector */
113 int offset_end; /* One past the end */
114 int offset_max; /* The maximum usable for return data */
115 bool offset_overflow; /* Set if too many extractions */
116 const UChar* start_subject; /* Start of the subject string */
117 const UChar* end_subject; /* End of the subject string */
118 const UChar* end_match_ptr; /* Subject position at end match */
119 int end_offset_top; /* Highwater mark at end of match */
124 /* Non-error returns from the match() function. Error returns are externally
125 defined PCRE_ERROR_xxx codes, which are all negative. */
127 #define MATCH_MATCH 1
128 #define MATCH_NOMATCH 0
131 /*************************************************
132 * Debugging function to print chars *
133 *************************************************/
135 /* Print a sequence of chars in printable format, stopping at the end of the
136 subject if the requested.
139 p points to characters
140 length number to print
141 is_subject true if printing from within md.start_subject
142 md pointer to matching data block, if is_subject is true
145 static void pchars(const UChar* p, int length, bool is_subject, const MatchData& md)
147 if (is_subject && length > md.end_subject - p)
148 length = md.end_subject - p;
149 while (length-- > 0) {
151 if (isprint(c = *(p++)))
154 printf("\\x%02x", c);
156 printf("\\x{%x}", c);
163 /*************************************************
164 * Match a back-reference *
165 *************************************************/
167 /* If a back reference hasn't been set, the length that is passed is greater
168 than the number of characters left in the string, so the match fails.
171 offset index into the offset vector
172 subjectPtr points into the subject
173 length length to be matched
174 md points to match data block
176 Returns: true if matched
179 static bool match_ref(int offset, const UChar* subjectPtr, int length, const MatchData& md)
181 const UChar* p = md.start_subject + md.offset_vector[offset];
184 if (subjectPtr >= md.end_subject)
185 printf("matching subject <null>");
187 printf("matching subject ");
188 pchars(subjectPtr, length, true, md);
190 printf(" against backref ");
191 pchars(p, length, false, md);
195 /* Always fail if not enough characters left */
197 if (length > md.end_subject - subjectPtr)
200 /* Separate the caselesss case for speed */
203 while (length-- > 0) {
205 int othercase = _pcre_ucp_othercase(c);
206 UChar d = *subjectPtr++;
207 if (c != d && othercase != d)
213 if (*p++ != *subjectPtr++)
220 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
222 /* Use numbered labels and switch statement at the bottom of the match function. */
224 #define RMATCH_WHERE(num) num
225 #define RRETURN_LABEL RRETURN_SWITCH
229 /* Use GCC's computed goto extension. */
231 /* For one test case this is more than 40% faster than the switch statement.
232 We could avoid the use of the num argument entirely by using local labels,
233 but using it for the GCC case as well as the non-GCC case allows us to share
234 a bit more code and notice if we use conflicting numbers.*/
236 #define RMATCH_WHERE(num) &&RRETURN_##num
237 #define RRETURN_LABEL *stack.currentFrame->returnLocation
241 #define CHECK_RECURSION_LIMIT \
242 if (stack.size >= MATCH_LIMIT_RECURSION) \
243 return matchError(JSRegExpErrorRecursionLimit, stack);
245 #define RECURSE_WITH_RETURN_NUMBER(num) \
246 CHECK_RECURSION_LIMIT \
250 #define RECURSIVE_MATCH(num, ra, rb) \
252 stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \
253 RECURSE_WITH_RETURN_NUMBER(num) \
254 stack.popCurrentFrame(); \
257 #define RECURSIVE_MATCH_STARTNG_NEW_GROUP(num, ra, rb) \
259 stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \
260 startNewGroup(stack.currentFrame); \
261 RECURSE_WITH_RETURN_NUMBER(num) \
262 stack.popCurrentFrame(); \
265 #define RRETURN goto RRETURN_LABEL
267 #define RRETURN_NO_MATCH \
273 /*************************************************
274 * Match from current position *
275 *************************************************/
277 /* On entry instructionPtr points to the first opcode, and subjectPtr to the first character
278 in the subject string, while substringStart holds the value of subjectPtr at the start of the
279 last bracketed group - used for breaking infinite loops matching zero-length
280 strings. This function is called recursively in many circumstances. Whenever it
281 returns a negative (error) response, the outer match() call must also return the
285 subjectPtr pointer in subject
286 instructionPtr position in code
287 offset_top current top pointer
288 md pointer to "static" info for the match
290 Returns: MATCH_MATCH if matched ) these values are >= 0
291 MATCH_NOMATCH if failed to match )
292 a negative PCRE_ERROR_xxx value if aborted by an error condition
293 (e.g. stopped by repeated call or recursion limit)
296 static const unsigned FRAMES_ON_STACK = 16;
300 : framesEnd(frames + FRAMES_ON_STACK)
301 , currentFrame(frames)
302 , size(1) // match() creates accesses the first frame w/o calling pushNewFrame
304 ASSERT((sizeof(frames) / sizeof(frames[0])) == FRAMES_ON_STACK);
307 MatchFrame frames[FRAMES_ON_STACK];
308 MatchFrame* framesEnd;
309 MatchFrame* currentFrame;
312 inline bool canUseStackBufferForNextFrame()
314 return size < FRAMES_ON_STACK;
317 inline MatchFrame* allocateNextFrame()
319 if (canUseStackBufferForNextFrame())
320 return currentFrame + 1;
321 return new MatchFrame;
324 inline void pushNewFrame(const uschar* instructionPtr, const UChar* subpatternStart, ReturnLocation returnLocation)
326 MatchFrame* newframe = allocateNextFrame();
327 newframe->previousFrame = currentFrame;
329 newframe->args.subjectPtr = currentFrame->args.subjectPtr;
330 newframe->args.offset_top = currentFrame->args.offset_top;
331 newframe->args.instructionPtr = instructionPtr;
332 newframe->args.subpatternStart = subpatternStart;
333 newframe->returnLocation = returnLocation;
336 currentFrame = newframe;
339 inline void popCurrentFrame()
341 MatchFrame* oldFrame = currentFrame;
342 currentFrame = currentFrame->previousFrame;
343 if (size > FRAMES_ON_STACK)
355 static int matchError(int errorCode, MatchStack& stack)
357 stack.popAllFrames();
361 /* Get the next UTF-8 character, not advancing the pointer, incrementing length
362 if there are extra bytes. This is called when we know we are in UTF-8 mode. */
364 static inline void getUTF8CharAndIncrementLength(int& c, const uschar* subjectPtr, int& len)
367 if ((c & 0xc0) == 0xc0) {
368 int gcaa = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
370 c = (c & _pcre_utf8_table3[gcaa]) << gcss;
371 for (int gcii = 1; gcii <= gcaa; gcii++) {
373 c |= (subjectPtr[gcii] & 0x3f) << gcss;
379 static inline void startNewGroup(MatchFrame* currentFrame)
381 /* At the start of a bracketed group, add the current subject pointer to the
382 stack of such pointers, to be re-instated at the end of the group when we hit
383 the closing ket. When match() is called in other circumstances, we don't add to
386 currentFrame->locals.subpatternStart = currentFrame->args.subpatternStart;
389 // FIXME: "minimize" means "not greedy", we should invert the callers to ask for "greedy" to be less confusing
390 static inline void repeatInformationFromInstructionOffset(short instructionOffset, bool& minimize, int& minimumRepeats, int& maximumRepeats)
392 // Instruction offsets are based off of OP_CRSTAR, OP_STAR, OP_TYPESTAR, OP_NOTSTAR
393 static const char minimumRepeatsFromInstructionOffset[] = { 0, 0, 1, 1, 0, 0 };
394 static const int maximumRepeatsFromInstructionOffset[] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX, 1, 1 };
396 ASSERT(instructionOffset >= 0);
397 ASSERT(instructionOffset <= (OP_CRMINQUERY - OP_CRSTAR));
399 minimize = (instructionOffset & 1); // this assumes ordering: Instruction, MinimizeInstruction, Instruction2, MinimizeInstruction2
400 minimumRepeats = minimumRepeatsFromInstructionOffset[instructionOffset];
401 maximumRepeats = maximumRepeatsFromInstructionOffset[instructionOffset];
404 static int match(const UChar* subjectPtr, const uschar* instructionPtr, int offset_top, MatchData& md)
406 int is_match = false;
408 bool minimize = false; /* Initialization not really needed, but some compilers think so. */
412 /* The opcode jump table. */
413 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
414 #define EMIT_JUMP_TABLE_ENTRY(opcode) &&LABEL_OP_##opcode,
415 static void* opcode_jump_table[256] = { FOR_EACH_OPCODE(EMIT_JUMP_TABLE_ENTRY) };
416 #undef EMIT_JUMP_TABLE_ENTRY
419 /* One-time setup of the opcode jump table. */
420 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
421 for (int i = 255; !opcode_jump_table[i]; i--)
422 opcode_jump_table[i] = &&CAPTURING_BRACKET;
425 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
426 // Shark shows this as a hot line
427 // Using a static const here makes this line disappear, but makes later access hotter (not sure why)
428 stack.currentFrame->returnLocation = &&RETURN;
430 stack.currentFrame->returnLocation = 0;
432 stack.currentFrame->args.subjectPtr = subjectPtr;
433 stack.currentFrame->args.instructionPtr = instructionPtr;
434 stack.currentFrame->args.offset_top = offset_top;
435 stack.currentFrame->args.subpatternStart = 0;
436 startNewGroup(stack.currentFrame);
438 /* This is where control jumps back to to effect "recursion" */
442 /* Now start processing the operations. */
444 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
449 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
450 #define BEGIN_OPCODE(opcode) LABEL_OP_##opcode
451 #define NEXT_OPCODE goto *opcode_jump_table[*stack.currentFrame->args.instructionPtr]
453 #define BEGIN_OPCODE(opcode) case OP_##opcode
454 #define NEXT_OPCODE continue
457 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
460 switch (*stack.currentFrame->args.instructionPtr)
463 /* Non-capturing bracket: optimized */
466 NON_CAPTURING_BRACKET:
467 DPRINTF(("start bracket 0\n"));
469 RECURSIVE_MATCH_STARTNG_NEW_GROUP(2, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.subpatternStart);
472 stack.currentFrame->args.instructionPtr += getOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
473 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
474 DPRINTF(("bracket 0 failed\n"));
477 /* Skip over large extraction number data if encountered. */
479 BEGIN_OPCODE(BRANUMBER):
480 stack.currentFrame->args.instructionPtr += 3;
483 /* End of the pattern. */
486 md.end_match_ptr = stack.currentFrame->args.subjectPtr; /* Record where we ended */
487 md.end_offset_top = stack.currentFrame->args.offset_top; /* and how many extracts were taken */
491 /* Assertion brackets. Check the alternative branches in turn - the
492 matching won't pass the KET for an assertion. If any one branch matches,
493 the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
494 start of each branch to move the current point backwards, so the code at
495 this level is identical to the lookahead case. */
497 BEGIN_OPCODE(ASSERT):
499 RECURSIVE_MATCH_STARTNG_NEW_GROUP(6, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, NULL);
502 stack.currentFrame->args.instructionPtr += getOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
503 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
504 if (*stack.currentFrame->args.instructionPtr == OP_KET)
507 /* Continue from after the assertion, updating the offsets high water
508 mark, since extracts may have been taken during the assertion. */
510 moveOpcodePtrPastAnyAlternateBranches(stack.currentFrame->args.instructionPtr);
511 stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
512 stack.currentFrame->args.offset_top = md.end_offset_top;
515 /* Negative assertion: all branches must fail to match */
517 BEGIN_OPCODE(ASSERT_NOT):
519 RECURSIVE_MATCH_STARTNG_NEW_GROUP(7, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, NULL);
522 stack.currentFrame->args.instructionPtr += getOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
523 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
525 stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
528 /* "Once" brackets are like assertion brackets except that after a match,
529 the point in the subject string is not moved back. Thus there can never be
530 a move back into the brackets. Friedl calls these "atomic" subpatterns.
531 Check the alternative branches in turn - the matching won't pass the KET
532 for this kind of subpattern. If any one branch matches, we carry on as at
533 the end of a normal bracket, leaving the subject pointer. */
536 stack.currentFrame->locals.instructionPtrAtStartOfOnce = stack.currentFrame->args.instructionPtr;
537 stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
540 RECURSIVE_MATCH_STARTNG_NEW_GROUP(9, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.subpatternStart);
543 stack.currentFrame->args.instructionPtr += getOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
544 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
546 /* If hit the end of the group (which could be repeated), fail */
548 if (*stack.currentFrame->args.instructionPtr != OP_ONCE && *stack.currentFrame->args.instructionPtr != OP_ALT)
551 /* Continue as from after the assertion, updating the offsets high water
552 mark, since extracts may have been taken. */
554 moveOpcodePtrPastAnyAlternateBranches(stack.currentFrame->args.instructionPtr);
556 stack.currentFrame->args.offset_top = md.end_offset_top;
557 stack.currentFrame->args.subjectPtr = md.end_match_ptr;
559 /* For a non-repeating ket, just continue at this level. This also
560 happens for a repeating ket if no characters were matched in the group.
561 This is the forcible breaking of infinite loops as implemented in Perl
562 5.005. If there is an options reset, it will get obeyed in the normal
565 if (*stack.currentFrame->args.instructionPtr == OP_KET || stack.currentFrame->args.subjectPtr == stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
566 stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
570 /* The repeating kets try the rest of the pattern or restart from the
571 preceding bracket, in the appropriate order. We need to reset any options
572 that changed within the bracket before re-running it, so check the next
575 if (*stack.currentFrame->args.instructionPtr == OP_KETRMIN) {
576 RECURSIVE_MATCH(10, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.subpatternStart);
579 RECURSIVE_MATCH_STARTNG_NEW_GROUP(11, stack.currentFrame->locals.instructionPtrAtStartOfOnce, stack.currentFrame->args.subpatternStart);
582 } else { /* OP_KETRMAX */
583 RECURSIVE_MATCH_STARTNG_NEW_GROUP(12, stack.currentFrame->locals.instructionPtrAtStartOfOnce, stack.currentFrame->args.subpatternStart);
586 RECURSIVE_MATCH(13, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.subpatternStart);
592 /* An alternation is the end of a branch; scan along to find the end of the
593 bracketed group and go to there. */
596 moveOpcodePtrPastAnyAlternateBranches(stack.currentFrame->args.instructionPtr);
599 /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
600 that it may occur zero times. It may repeat infinitely, or not at all -
601 i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
602 repeat limits are compiled as a number of copies, with the optional ones
603 preceded by BRAZERO or BRAMINZERO. */
605 BEGIN_OPCODE(BRAZERO): {
606 stack.currentFrame->locals.startOfRepeatingBracket = stack.currentFrame->args.instructionPtr + 1;
607 RECURSIVE_MATCH_STARTNG_NEW_GROUP(14, stack.currentFrame->locals.startOfRepeatingBracket, stack.currentFrame->args.subpatternStart);
610 moveOpcodePtrPastAnyAlternateBranches(stack.currentFrame->locals.startOfRepeatingBracket);
611 stack.currentFrame->args.instructionPtr = stack.currentFrame->locals.startOfRepeatingBracket + 1 + LINK_SIZE;
615 BEGIN_OPCODE(BRAMINZERO): {
616 stack.currentFrame->locals.startOfRepeatingBracket = stack.currentFrame->args.instructionPtr + 1;
617 moveOpcodePtrPastAnyAlternateBranches(stack.currentFrame->locals.startOfRepeatingBracket);
618 RECURSIVE_MATCH_STARTNG_NEW_GROUP(15, stack.currentFrame->locals.startOfRepeatingBracket + 1 + LINK_SIZE, stack.currentFrame->args.subpatternStart);
621 stack.currentFrame->args.instructionPtr++;
625 /* End of a group, repeated or non-repeating. If we are at the end of
626 an assertion "group", stop matching and return MATCH_MATCH, but record the
627 current high water mark for use by positive assertions. Do this also
628 for the "once" (not-backup up) groups. */
631 BEGIN_OPCODE(KETRMIN):
632 BEGIN_OPCODE(KETRMAX):
633 stack.currentFrame->locals.instructionPtrAtStartOfOnce = stack.currentFrame->args.instructionPtr - getOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
634 stack.currentFrame->args.subpatternStart = stack.currentFrame->locals.subpatternStart;
635 stack.currentFrame->locals.subpatternStart = stack.currentFrame->previousFrame->args.subpatternStart;
637 if (*stack.currentFrame->locals.instructionPtrAtStartOfOnce == OP_ASSERT || *stack.currentFrame->locals.instructionPtrAtStartOfOnce == OP_ASSERT_NOT || *stack.currentFrame->locals.instructionPtrAtStartOfOnce == OP_ONCE) {
638 md.end_match_ptr = stack.currentFrame->args.subjectPtr; /* For ONCE */
639 md.end_offset_top = stack.currentFrame->args.offset_top;
644 /* In all other cases except a conditional group we have to check the
645 group number back at the start and if necessary complete handling an
646 extraction by setting the offsets and bumping the high water mark. */
648 stack.currentFrame->locals.number = *stack.currentFrame->locals.instructionPtrAtStartOfOnce - OP_BRA;
650 /* For extended extraction brackets (large number), we have to fish out
651 the number from a dummy opcode at the start. */
653 if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX)
654 stack.currentFrame->locals.number = get2ByteOpcodeValueAtOffset(stack.currentFrame->locals.instructionPtrAtStartOfOnce, 2+LINK_SIZE);
655 stack.currentFrame->locals.offset = stack.currentFrame->locals.number << 1;
658 printf("end bracket %d", stack.currentFrame->locals.number);
662 /* Test for a numbered group. This includes groups called as a result
663 of recursion. Note that whole-pattern recursion is coded as a recurse
664 into group 0, so it won't be picked up here. Instead, we catch it when
665 the OP_END is reached. */
667 if (stack.currentFrame->locals.number > 0) {
668 if (stack.currentFrame->locals.offset >= md.offset_max)
669 md.offset_overflow = true;
671 md.offset_vector[stack.currentFrame->locals.offset] =
672 md.offset_vector[md.offset_end - stack.currentFrame->locals.number];
673 md.offset_vector[stack.currentFrame->locals.offset+1] = stack.currentFrame->args.subjectPtr - md.start_subject;
674 if (stack.currentFrame->args.offset_top <= stack.currentFrame->locals.offset)
675 stack.currentFrame->args.offset_top = stack.currentFrame->locals.offset + 2;
679 /* For a non-repeating ket, just continue at this level. This also
680 happens for a repeating ket if no characters were matched in the group.
681 This is the forcible breaking of infinite loops as implemented in Perl
682 5.005. If there is an options reset, it will get obeyed in the normal
685 if (*stack.currentFrame->args.instructionPtr == OP_KET || stack.currentFrame->args.subjectPtr == stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
686 stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
690 /* The repeating kets try the rest of the pattern or restart from the
691 preceding bracket, in the appropriate order. */
693 if (*stack.currentFrame->args.instructionPtr == OP_KETRMIN) {
694 RECURSIVE_MATCH(16, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.subpatternStart);
697 RECURSIVE_MATCH_STARTNG_NEW_GROUP(17, stack.currentFrame->locals.instructionPtrAtStartOfOnce, stack.currentFrame->args.subpatternStart);
700 } else { /* OP_KETRMAX */
701 RECURSIVE_MATCH_STARTNG_NEW_GROUP(18, stack.currentFrame->locals.instructionPtrAtStartOfOnce, stack.currentFrame->args.subpatternStart);
704 RECURSIVE_MATCH(19, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.subpatternStart);
710 /* Start of subject, or after internal newline if multiline. */
713 if (stack.currentFrame->args.subjectPtr != md.start_subject && (!md.multiline || !isNewline(stack.currentFrame->args.subjectPtr[-1])))
715 stack.currentFrame->args.instructionPtr++;
718 /* End of subject, or before internal newline if multiline. */
721 if (stack.currentFrame->args.subjectPtr < md.end_subject && (!md.multiline || !isNewline(*stack.currentFrame->args.subjectPtr)))
723 stack.currentFrame->args.instructionPtr++;
726 /* Word boundary assertions */
728 BEGIN_OPCODE(NOT_WORD_BOUNDARY):
729 BEGIN_OPCODE(WORD_BOUNDARY): {
730 bool currentCharIsWordChar = false;
731 bool previousCharIsWordChar = false;
733 if (stack.currentFrame->args.subjectPtr > md.start_subject)
734 previousCharIsWordChar = isWordChar(stack.currentFrame->args.subjectPtr[-1]);
735 if (stack.currentFrame->args.subjectPtr < md.end_subject)
736 currentCharIsWordChar = isWordChar(*stack.currentFrame->args.subjectPtr);
738 /* Now see if the situation is what we want */
739 bool wordBoundaryDesired = (*stack.currentFrame->args.instructionPtr++ == OP_WORD_BOUNDARY);
740 if (wordBoundaryDesired ? currentCharIsWordChar == previousCharIsWordChar : currentCharIsWordChar != previousCharIsWordChar)
745 /* Match a single character type; inline for speed */
747 BEGIN_OPCODE(NOT_NEWLINE):
748 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
750 if (isNewline(*stack.currentFrame->args.subjectPtr++))
752 stack.currentFrame->args.instructionPtr++;
755 BEGIN_OPCODE(NOT_DIGIT):
756 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
758 if (isASCIIDigit(*stack.currentFrame->args.subjectPtr++))
760 stack.currentFrame->args.instructionPtr++;
764 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
766 if (!isASCIIDigit(*stack.currentFrame->args.subjectPtr++))
768 stack.currentFrame->args.instructionPtr++;
771 BEGIN_OPCODE(NOT_WHITESPACE):
772 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
774 if (isSpaceChar(*stack.currentFrame->args.subjectPtr++))
776 stack.currentFrame->args.instructionPtr++;
779 BEGIN_OPCODE(WHITESPACE):
780 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
782 if (!isSpaceChar(*stack.currentFrame->args.subjectPtr++))
784 stack.currentFrame->args.instructionPtr++;
787 BEGIN_OPCODE(NOT_WORDCHAR):
788 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
790 if (isWordChar(*stack.currentFrame->args.subjectPtr++))
792 stack.currentFrame->args.instructionPtr++;
795 BEGIN_OPCODE(WORDCHAR):
796 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
798 if (!isWordChar(*stack.currentFrame->args.subjectPtr++))
800 stack.currentFrame->args.instructionPtr++;
803 /* Match a back reference, possibly repeatedly. Look past the end of the
804 item to see if there is repeat information following. The code is similar
805 to that for character classes, but repeated for efficiency. Then obey
806 similar code to character type repeats - written out again for speed.
807 However, if the referenced string is the empty string, always treat
808 it as matched, any number of times (otherwise there could be infinite
812 stack.currentFrame->locals.offset = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1) << 1; /* Doubled ref number */
813 stack.currentFrame->args.instructionPtr += 3; /* Advance past item */
815 /* If the reference is unset, set the length to be longer than the amount
816 of subject left; this ensures that every attempt at a match fails. We
817 can't just fail here, because of the possibility of quantifiers with zero
820 if (stack.currentFrame->locals.offset >= stack.currentFrame->args.offset_top || md.offset_vector[stack.currentFrame->locals.offset] < 0)
821 stack.currentFrame->locals.length = 0;
823 stack.currentFrame->locals.length = md.offset_vector[stack.currentFrame->locals.offset+1] - md.offset_vector[stack.currentFrame->locals.offset];
825 /* Set up for repetition, or handle the non-repeated case */
827 switch (*stack.currentFrame->args.instructionPtr) {
834 repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
839 minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
840 min = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
841 stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 3);
842 if (stack.currentFrame->locals.max == 0)
843 stack.currentFrame->locals.max = INT_MAX;
844 stack.currentFrame->args.instructionPtr += 5;
847 default: /* No repeat follows */
848 if (!match_ref(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
850 stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
854 /* If the length of the reference is zero, just continue with the
857 if (stack.currentFrame->locals.length == 0)
860 /* First, ensure the minimum number of matches are present. */
862 for (int i = 1; i <= min; i++) {
863 if (!match_ref(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
865 stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
868 /* If min = max, continue at the same level without recursion.
869 They are not both allowed to be zero. */
871 if (min == stack.currentFrame->locals.max)
874 /* If minimizing, keep trying and advancing the pointer */
877 for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
878 RECURSIVE_MATCH(20, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
881 if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || !match_ref(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
883 stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
885 /* Control never reaches here */
888 /* If maximizing, find the longest string and work backwards */
891 stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
892 for (int i = min; i < stack.currentFrame->locals.max; i++) {
893 if (!match_ref(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
895 stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
897 while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
898 RECURSIVE_MATCH(21, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
901 stack.currentFrame->args.subjectPtr -= stack.currentFrame->locals.length;
905 /* Control never reaches here */
907 /* Match a bit-mapped character class, possibly repeatedly. This op code is
908 used when all the characters in the class have values in the range 0-255,
909 and either the matching is caseful, or the characters are in the range
910 0-127 when UTF-8 processing is enabled. The only difference between
911 OP_CLASS and OP_NCLASS occurs when a data character outside the range is
914 First, look past the end of the item to see if there is repeat information
915 following. Then obey similar code to character type repeats - written out
918 BEGIN_OPCODE(NCLASS):
920 stack.currentFrame->locals.data = stack.currentFrame->args.instructionPtr + 1; /* Save for matching */
921 stack.currentFrame->args.instructionPtr += 33; /* Advance past the item */
923 switch (*stack.currentFrame->args.instructionPtr) {
930 repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
935 minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
936 min = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
937 stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 3);
938 if (stack.currentFrame->locals.max == 0)
939 stack.currentFrame->locals.max = INT_MAX;
940 stack.currentFrame->args.instructionPtr += 5;
943 default: /* No repeat follows */
944 min = stack.currentFrame->locals.max = 1;
948 /* First, ensure the minimum number of matches are present. */
950 for (int i = 1; i <= min; i++) {
951 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
953 int c = *stack.currentFrame->args.subjectPtr++;
955 if (stack.currentFrame->locals.data[-1] == OP_CLASS)
958 if (!(stack.currentFrame->locals.data[c / 8] & (1 << (c & 7))))
963 /* If max == min we can continue with the main loop without the
966 if (min == stack.currentFrame->locals.max)
969 /* If minimizing, keep testing the rest of the expression and advancing
970 the pointer while it matches the class. */
972 for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
973 RECURSIVE_MATCH(22, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
976 if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.end_subject)
978 int c = *stack.currentFrame->args.subjectPtr++;
980 if (stack.currentFrame->locals.data[-1] == OP_CLASS)
983 if ((stack.currentFrame->locals.data[c/8] & (1 << (c&7))) == 0)
987 /* Control never reaches here */
989 /* If maximizing, find the longest possible run, then work backwards. */
991 stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
993 for (int i = min; i < stack.currentFrame->locals.max; i++) {
994 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
996 int c = *stack.currentFrame->args.subjectPtr;
998 if (stack.currentFrame->locals.data[-1] == OP_CLASS)
1001 if (!(stack.currentFrame->locals.data[c / 8] & (1 << (c & 7))))
1004 ++stack.currentFrame->args.subjectPtr;
1007 RECURSIVE_MATCH(24, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1010 if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
1011 break; /* Stop if tried at original pos */
1016 /* Control never reaches here */
1018 /* Match an extended character class. */
1020 BEGIN_OPCODE(XCLASS):
1021 stack.currentFrame->locals.data = stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE; /* Save for matching */
1022 stack.currentFrame->args.instructionPtr += getOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1); /* Advance past the item */
1024 switch (*stack.currentFrame->args.instructionPtr) {
1031 repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
1036 minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
1037 min = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
1038 stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 3);
1039 if (stack.currentFrame->locals.max == 0)
1040 stack.currentFrame->locals.max = INT_MAX;
1041 stack.currentFrame->args.instructionPtr += 5;
1044 default: /* No repeat follows */
1045 min = stack.currentFrame->locals.max = 1;
1048 /* First, ensure the minimum number of matches are present. */
1050 for (int i = 1; i <= min; i++) {
1051 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1053 int c = *stack.currentFrame->args.subjectPtr++;
1054 if (!_pcre_xclass(c, stack.currentFrame->locals.data))
1058 /* If max == min we can continue with the main loop without the
1061 if (min == stack.currentFrame->locals.max)
1064 /* If minimizing, keep testing the rest of the expression and advancing
1065 the pointer while it matches the class. */
1068 for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1069 RECURSIVE_MATCH(26, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1072 if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.end_subject)
1074 int c = *stack.currentFrame->args.subjectPtr++;
1075 if (!_pcre_xclass(c, stack.currentFrame->locals.data))
1078 /* Control never reaches here */
1081 /* If maximizing, find the longest possible run, then work backwards. */
1084 stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1085 for (int i = min; i < stack.currentFrame->locals.max; i++) {
1086 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1088 int c = *stack.currentFrame->args.subjectPtr;
1089 if (!_pcre_xclass(c, stack.currentFrame->locals.data))
1091 ++stack.currentFrame->args.subjectPtr;
1094 RECURSIVE_MATCH(27, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1097 if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
1098 break; /* Stop if tried at original pos */
1103 /* Control never reaches here */
1105 /* Match a single character, casefully */
1108 stack.currentFrame->locals.length = 1;
1109 stack.currentFrame->args.instructionPtr++;
1110 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
1111 stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length;
1112 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1114 if (stack.currentFrame->locals.fc != *stack.currentFrame->args.subjectPtr++)
1118 /* Match a single character, caselessly */
1120 BEGIN_OPCODE(CHAR_IGNORING_CASE): {
1121 stack.currentFrame->locals.length = 1;
1122 stack.currentFrame->args.instructionPtr++;
1123 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
1124 stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length;
1125 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1127 int dc = *stack.currentFrame->args.subjectPtr++;
1128 if (stack.currentFrame->locals.fc != dc && _pcre_ucp_othercase(stack.currentFrame->locals.fc) != dc)
1133 /* Match a single ASCII character. */
1135 BEGIN_OPCODE(ASCII_CHAR):
1136 if (md.end_subject == stack.currentFrame->args.subjectPtr)
1138 if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->args.instructionPtr[1])
1140 ++stack.currentFrame->args.subjectPtr;
1141 stack.currentFrame->args.instructionPtr += 2;
1144 /* Match one of two cases of an ASCII letter. */
1146 BEGIN_OPCODE(ASCII_LETTER_IGNORING_CASE):
1147 if (md.end_subject == stack.currentFrame->args.subjectPtr)
1149 if ((*stack.currentFrame->args.subjectPtr | 0x20) != stack.currentFrame->args.instructionPtr[1])
1151 ++stack.currentFrame->args.subjectPtr;
1152 stack.currentFrame->args.instructionPtr += 2;
1155 /* Match a single character repeatedly; different opcodes share code. */
1157 BEGIN_OPCODE(EXACT):
1158 min = stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
1160 stack.currentFrame->args.instructionPtr += 3;
1164 BEGIN_OPCODE(MINUPTO):
1166 stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
1167 minimize = *stack.currentFrame->args.instructionPtr == OP_MINUPTO;
1168 stack.currentFrame->args.instructionPtr += 3;
1172 BEGIN_OPCODE(MINSTAR):
1174 BEGIN_OPCODE(MINPLUS):
1175 BEGIN_OPCODE(QUERY):
1176 BEGIN_OPCODE(MINQUERY):
1177 repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_STAR, minimize, min, stack.currentFrame->locals.max);
1179 /* Common code for all repeated single-character matches. We can give
1180 up quickly if there are fewer than the minimum number of characters left in
1185 stack.currentFrame->locals.length = 1;
1186 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
1187 if (min * (stack.currentFrame->locals.fc > 0xFFFF ? 2 : 1) > md.end_subject - stack.currentFrame->args.subjectPtr)
1189 stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length;
1191 if (stack.currentFrame->locals.fc <= 0xFFFF) {
1192 int othercase = md.ignoreCase ? _pcre_ucp_othercase(stack.currentFrame->locals.fc) : -1;
1194 for (int i = 1; i <= min; i++) {
1195 if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != othercase)
1197 ++stack.currentFrame->args.subjectPtr;
1200 if (min == stack.currentFrame->locals.max)
1204 stack.currentFrame->locals.repeat_othercase = othercase;
1205 for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1206 RECURSIVE_MATCH(28, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1209 if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.end_subject)
1211 if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.repeat_othercase)
1213 ++stack.currentFrame->args.subjectPtr;
1215 /* Control never reaches here */
1217 stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1218 for (int i = min; i < stack.currentFrame->locals.max; i++) {
1219 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1221 if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != othercase)
1223 ++stack.currentFrame->args.subjectPtr;
1225 while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
1226 RECURSIVE_MATCH(29, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1229 --stack.currentFrame->args.subjectPtr;
1233 /* Control never reaches here */
1235 /* No case on surrogate pairs, so no need to bother with "othercase". */
1237 for (int i = 1; i <= min; i++) {
1238 if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
1240 stack.currentFrame->args.subjectPtr += 2;
1243 if (min == stack.currentFrame->locals.max)
1247 for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1248 RECURSIVE_MATCH(30, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1251 if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.end_subject)
1253 if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
1255 stack.currentFrame->args.subjectPtr += 2;
1257 /* Control never reaches here */
1259 stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1260 for (int i = min; i < stack.currentFrame->locals.max; i++) {
1261 if (stack.currentFrame->args.subjectPtr > md.end_subject - 2)
1263 if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
1265 stack.currentFrame->args.subjectPtr += 2;
1267 while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
1268 RECURSIVE_MATCH(31, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1271 stack.currentFrame->args.subjectPtr -= 2;
1275 /* Control never reaches here */
1277 /* Control never reaches here */
1279 /* Match a negated single one-byte character. */
1281 BEGIN_OPCODE(NOT): {
1282 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1284 stack.currentFrame->args.instructionPtr++;
1285 int c = *stack.currentFrame->args.subjectPtr++;
1286 if (md.ignoreCase) {
1289 if (toLowerCase(*stack.currentFrame->args.instructionPtr++) == c)
1292 if (*stack.currentFrame->args.instructionPtr++ == c)
1298 /* Match a negated single one-byte character repeatedly. This is almost a
1299 repeat of the code for a repeated single character, but I haven't found a
1300 nice way of commoning these up that doesn't require a test of the
1301 positive/negative option for each character match. Maybe that wouldn't add
1302 very much to the time taken, but character matching *is* what this is all
1305 BEGIN_OPCODE(NOTEXACT):
1306 min = stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
1308 stack.currentFrame->args.instructionPtr += 3;
1311 BEGIN_OPCODE(NOTUPTO):
1312 BEGIN_OPCODE(NOTMINUPTO):
1314 stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
1315 minimize = *stack.currentFrame->args.instructionPtr == OP_NOTMINUPTO;
1316 stack.currentFrame->args.instructionPtr += 3;
1319 BEGIN_OPCODE(NOTSTAR):
1320 BEGIN_OPCODE(NOTMINSTAR):
1321 BEGIN_OPCODE(NOTPLUS):
1322 BEGIN_OPCODE(NOTMINPLUS):
1323 BEGIN_OPCODE(NOTQUERY):
1324 BEGIN_OPCODE(NOTMINQUERY):
1325 repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_NOTSTAR, minimize, min, stack.currentFrame->locals.max);
1327 /* Common code for all repeated single-byte matches. We can give up quickly
1328 if there are fewer than the minimum number of bytes left in the
1332 if (min > md.end_subject - stack.currentFrame->args.subjectPtr)
1334 stack.currentFrame->locals.fc = *stack.currentFrame->args.instructionPtr++;
1336 /* The code is duplicated for the caseless and caseful cases, for speed,
1337 since matching characters is likely to be quite common. First, ensure the
1338 minimum number of matches are present. If min = max, continue at the same
1339 level without recursing. Otherwise, if minimizing, keep trying the rest of
1340 the expression and advancing one matching character if failing, up to the
1341 maximum. Alternatively, if maximizing, find the maximum number of
1342 characters and work backwards. */
1344 DPRINTF(("negative matching %c{%d,%d}\n", stack.currentFrame->locals.fc, min, stack.currentFrame->locals.max));
1346 if (md.ignoreCase) {
1347 if (stack.currentFrame->locals.fc < 128)
1348 stack.currentFrame->locals.fc = toLowerCase(stack.currentFrame->locals.fc);
1350 for (int i = 1; i <= min; i++) {
1351 int d = *stack.currentFrame->args.subjectPtr++;
1354 if (stack.currentFrame->locals.fc == d)
1358 if (min == stack.currentFrame->locals.max)
1362 for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1363 RECURSIVE_MATCH(38, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1366 int d = *stack.currentFrame->args.subjectPtr++;
1369 if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.end_subject || stack.currentFrame->locals.fc == d)
1372 /* Control never reaches here */
1378 stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1380 for (int i = min; i < stack.currentFrame->locals.max; i++) {
1381 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1383 int d = *stack.currentFrame->args.subjectPtr;
1386 if (stack.currentFrame->locals.fc == d)
1388 ++stack.currentFrame->args.subjectPtr;
1391 RECURSIVE_MATCH(40, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1394 if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
1395 break; /* Stop if tried at original pos */
1400 /* Control never reaches here */
1403 /* Caseful comparisons */
1406 for (int i = 1; i <= min; i++) {
1407 int d = *stack.currentFrame->args.subjectPtr++;
1408 if (stack.currentFrame->locals.fc == d)
1412 if (min == stack.currentFrame->locals.max)
1416 for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1417 RECURSIVE_MATCH(42, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1420 int d = *stack.currentFrame->args.subjectPtr++;
1421 if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.end_subject || stack.currentFrame->locals.fc == d)
1424 /* Control never reaches here */
1430 stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1432 for (int i = min; i < stack.currentFrame->locals.max; i++) {
1433 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1435 int d = *stack.currentFrame->args.subjectPtr;
1436 if (stack.currentFrame->locals.fc == d)
1438 ++stack.currentFrame->args.subjectPtr;
1441 RECURSIVE_MATCH(44, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1444 if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
1445 break; /* Stop if tried at original pos */
1451 /* Control never reaches here */
1453 /* Match a single character type repeatedly; several different opcodes
1454 share code. This is very similar to the code for single characters, but we
1455 repeat it in the interests of efficiency. */
1457 BEGIN_OPCODE(TYPEEXACT):
1458 min = stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
1460 stack.currentFrame->args.instructionPtr += 3;
1463 BEGIN_OPCODE(TYPEUPTO):
1464 BEGIN_OPCODE(TYPEMINUPTO):
1466 stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
1467 minimize = *stack.currentFrame->args.instructionPtr == OP_TYPEMINUPTO;
1468 stack.currentFrame->args.instructionPtr += 3;
1471 BEGIN_OPCODE(TYPESTAR):
1472 BEGIN_OPCODE(TYPEMINSTAR):
1473 BEGIN_OPCODE(TYPEPLUS):
1474 BEGIN_OPCODE(TYPEMINPLUS):
1475 BEGIN_OPCODE(TYPEQUERY):
1476 BEGIN_OPCODE(TYPEMINQUERY):
1477 repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_TYPESTAR, minimize, min, stack.currentFrame->locals.max);
1479 /* Common code for all repeated single character type matches. Note that
1480 in UTF-8 mode, '.' matches a character of any length, but for the other
1481 character types, the valid characters are all one-byte long. */
1484 stack.currentFrame->locals.ctype = *stack.currentFrame->args.instructionPtr++; /* Code for the character type */
1486 /* First, ensure the minimum number of matches are present. Use inline
1487 code for maximizing the speed, and do the type test once at the start
1488 (i.e. keep it out of the loop). Also we can test that there are at least
1489 the minimum number of characters before we start. */
1491 if (min > md.end_subject - stack.currentFrame->args.subjectPtr)
1494 switch (stack.currentFrame->locals.ctype) {
1495 case OP_NOT_NEWLINE:
1496 for (int i = 1; i <= min; i++) {
1497 if (isNewline(*stack.currentFrame->args.subjectPtr))
1499 ++stack.currentFrame->args.subjectPtr;
1504 for (int i = 1; i <= min; i++) {
1505 if (isASCIIDigit(*stack.currentFrame->args.subjectPtr))
1507 ++stack.currentFrame->args.subjectPtr;
1512 for (int i = 1; i <= min; i++) {
1513 if (!isASCIIDigit(*stack.currentFrame->args.subjectPtr))
1515 ++stack.currentFrame->args.subjectPtr;
1519 case OP_NOT_WHITESPACE:
1520 for (int i = 1; i <= min; i++) {
1521 if (isSpaceChar(*stack.currentFrame->args.subjectPtr))
1523 ++stack.currentFrame->args.subjectPtr;
1528 for (int i = 1; i <= min; i++) {
1529 if (!isSpaceChar(*stack.currentFrame->args.subjectPtr))
1531 ++stack.currentFrame->args.subjectPtr;
1535 case OP_NOT_WORDCHAR:
1536 for (int i = 1; i <= min; i++) {
1537 if (isWordChar(*stack.currentFrame->args.subjectPtr))
1539 ++stack.currentFrame->args.subjectPtr;
1544 for (int i = 1; i <= min; i++) {
1545 if (!isWordChar(*stack.currentFrame->args.subjectPtr))
1547 ++stack.currentFrame->args.subjectPtr;
1552 ASSERT_NOT_REACHED();
1553 return matchError(JSRegExpErrorInternal, stack);
1554 } /* End switch(stack.currentFrame->locals.ctype) */
1557 /* If min = max, continue at the same level without recursing */
1559 if (min == stack.currentFrame->locals.max)
1562 /* If minimizing, we have to test the rest of the pattern before each
1563 subsequent match. */
1566 for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1567 RECURSIVE_MATCH(48, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1570 if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.end_subject)
1573 int c = *stack.currentFrame->args.subjectPtr++;
1574 switch (stack.currentFrame->locals.ctype) {
1575 case OP_NOT_NEWLINE:
1581 if (isASCIIDigit(c))
1586 if (!isASCIIDigit(c))
1590 case OP_NOT_WHITESPACE:
1596 if (!isSpaceChar(c))
1600 case OP_NOT_WORDCHAR:
1611 ASSERT_NOT_REACHED();
1612 return matchError(JSRegExpErrorInternal, stack);
1615 /* Control never reaches here */
1618 /* If maximizing it is worth using inline code for speed, doing the type
1619 test once at the start (i.e. keep it out of the loop). */
1622 stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr; /* Remember where we started */
1624 switch (stack.currentFrame->locals.ctype) {
1625 case OP_NOT_NEWLINE:
1626 for (int i = min; i < stack.currentFrame->locals.max; i++) {
1627 if (stack.currentFrame->args.subjectPtr >= md.end_subject || isNewline(*stack.currentFrame->args.subjectPtr))
1629 stack.currentFrame->args.subjectPtr++;
1634 for (int i = min; i < stack.currentFrame->locals.max; i++) {
1635 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1637 int c = *stack.currentFrame->args.subjectPtr;
1638 if (isASCIIDigit(c))
1640 ++stack.currentFrame->args.subjectPtr;
1645 for (int i = min; i < stack.currentFrame->locals.max; i++) {
1646 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1648 int c = *stack.currentFrame->args.subjectPtr;
1649 if (!isASCIIDigit(c))
1651 ++stack.currentFrame->args.subjectPtr;
1655 case OP_NOT_WHITESPACE:
1656 for (int i = min; i < stack.currentFrame->locals.max; i++) {
1657 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1659 int c = *stack.currentFrame->args.subjectPtr;
1662 ++stack.currentFrame->args.subjectPtr;
1667 for (int i = min; i < stack.currentFrame->locals.max; i++) {
1668 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1670 int c = *stack.currentFrame->args.subjectPtr;
1671 if (!isSpaceChar(c))
1673 ++stack.currentFrame->args.subjectPtr;
1677 case OP_NOT_WORDCHAR:
1678 for (int i = min; i < stack.currentFrame->locals.max; i++) {
1679 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1681 int c = *stack.currentFrame->args.subjectPtr;
1684 ++stack.currentFrame->args.subjectPtr;
1689 for (int i = min; i < stack.currentFrame->locals.max; i++) {
1690 if (stack.currentFrame->args.subjectPtr >= md.end_subject)
1692 int c = *stack.currentFrame->args.subjectPtr;
1695 ++stack.currentFrame->args.subjectPtr;
1700 ASSERT_NOT_REACHED();
1701 return matchError(JSRegExpErrorInternal, stack);
1704 /* stack.currentFrame->args.subjectPtr is now past the end of the maximum run */
1707 RECURSIVE_MATCH(52, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.subpatternStart);
1710 if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
1711 break; /* Stop if tried at original pos */
1714 /* Get here if we can't make it match with any permitted repetitions */
1718 /* Control never reaches here */
1720 BEGIN_OPCODE(CRMINPLUS):
1721 BEGIN_OPCODE(CRMINQUERY):
1722 BEGIN_OPCODE(CRMINRANGE):
1723 BEGIN_OPCODE(CRMINSTAR):
1724 BEGIN_OPCODE(CRPLUS):
1725 BEGIN_OPCODE(CRQUERY):
1726 BEGIN_OPCODE(CRRANGE):
1727 BEGIN_OPCODE(CRSTAR):
1728 ASSERT_NOT_REACHED();
1729 return matchError(JSRegExpErrorInternal, stack);
1731 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
1736 /* Opening capturing bracket. If there is space in the offset vector, save
1737 the current subject position in the working slot at the top of the vector. We
1738 mustn't change the current values of the data slot, because they may be set
1739 from a previous iteration of this group, and be referred to by a reference
1742 If the bracket fails to match, we need to restore this value and also the
1743 values of the final offsets, in case they were set by a previous iteration of
1746 If there isn't enough space in the offset vector, treat this as if it were a
1747 non-capturing bracket. Don't worry about setting the flag for the error case
1748 here; that is handled in the code for KET. */
1750 ASSERT(*stack.currentFrame->args.instructionPtr > OP_BRA);
1752 stack.currentFrame->locals.number = *stack.currentFrame->args.instructionPtr - OP_BRA;
1754 /* For extended extraction brackets (large number), we have to fish out the
1755 number from a dummy opcode at the start. */
1757 if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX)
1758 stack.currentFrame->locals.number = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 2+LINK_SIZE);
1759 stack.currentFrame->locals.offset = stack.currentFrame->locals.number << 1;
1762 printf("start bracket %d subject=", stack.currentFrame->locals.number);
1763 pchars(stack.currentFrame->args.subjectPtr, 16, true, md);
1767 if (stack.currentFrame->locals.offset < md.offset_max) {
1768 stack.currentFrame->locals.save_offset1 = md.offset_vector[stack.currentFrame->locals.offset];
1769 stack.currentFrame->locals.save_offset2 = md.offset_vector[stack.currentFrame->locals.offset + 1];
1770 stack.currentFrame->locals.save_offset3 = md.offset_vector[md.offset_end - stack.currentFrame->locals.number];
1772 DPRINTF(("saving %d %d %d\n", stack.currentFrame->locals.save_offset1, stack.currentFrame->locals.save_offset2, stack.currentFrame->locals.save_offset3));
1773 md.offset_vector[md.offset_end - stack.currentFrame->locals.number] = stack.currentFrame->args.subjectPtr - md.start_subject;
1776 RECURSIVE_MATCH_STARTNG_NEW_GROUP(1, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.subpatternStart);
1779 stack.currentFrame->args.instructionPtr += getOpcodeValueAtOffset(stack.currentFrame->args.instructionPtr, 1);
1780 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
1782 DPRINTF(("bracket %d failed\n", stack.currentFrame->locals.number));
1784 md.offset_vector[stack.currentFrame->locals.offset] = stack.currentFrame->locals.save_offset1;
1785 md.offset_vector[stack.currentFrame->locals.offset + 1] = stack.currentFrame->locals.save_offset2;
1786 md.offset_vector[md.offset_end - stack.currentFrame->locals.number] = stack.currentFrame->locals.save_offset3;
1791 /* Insufficient room for saving captured contents */
1793 goto NON_CAPTURING_BRACKET;
1796 /* Do not stick any code in here without much thought; it is assumed
1797 that "continue" in the code above comes out to here to repeat the main
1800 } /* End of main loop */
1802 ASSERT_NOT_REACHED();
1804 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
1807 switch (stack.currentFrame->returnLocation) {
1808 case 0: goto RETURN;
1809 case 1: goto RRETURN_1;
1810 case 2: goto RRETURN_2;
1811 case 6: goto RRETURN_6;
1812 case 7: goto RRETURN_7;
1813 case 9: goto RRETURN_9;
1814 case 10: goto RRETURN_10;
1815 case 11: goto RRETURN_11;
1816 case 12: goto RRETURN_12;
1817 case 13: goto RRETURN_13;
1818 case 14: goto RRETURN_14;
1819 case 15: goto RRETURN_15;
1820 case 16: goto RRETURN_16;
1821 case 17: goto RRETURN_17;
1822 case 18: goto RRETURN_18;
1823 case 19: goto RRETURN_19;
1824 case 20: goto RRETURN_20;
1825 case 21: goto RRETURN_21;
1826 case 22: goto RRETURN_22;
1827 case 24: goto RRETURN_24;
1828 case 26: goto RRETURN_26;
1829 case 27: goto RRETURN_27;
1830 case 28: goto RRETURN_28;
1831 case 29: goto RRETURN_29;
1832 case 30: goto RRETURN_30;
1833 case 31: goto RRETURN_31;
1834 case 38: goto RRETURN_38;
1835 case 40: goto RRETURN_40;
1836 case 42: goto RRETURN_42;
1837 case 44: goto RRETURN_44;
1838 case 48: goto RRETURN_48;
1839 case 52: goto RRETURN_52;
1842 ASSERT_NOT_REACHED();
1843 return matchError(JSRegExpErrorInternal, stack);
1848 ASSERT(is_match == MATCH_MATCH || is_match == MATCH_NOMATCH);
1853 /*************************************************
1854 * Execute a Regular Expression *
1855 *************************************************/
1857 /* This function applies a compiled re to a subject string and picks out
1858 portions of the string if it matches. Two elements in the vector are set for
1859 each substring: the offsets to the start and end of the substring.
1862 re points to the compiled expression
1863 extra_data points to extra data or is NULL
1864 subject points to the subject string
1865 length length of subject string (may contain binary zeros)
1866 start_offset where to start in the subject string
1868 offsets points to a vector of ints to be filled in with offsets
1869 offsetcount the number of elements in the vector
1871 Returns: > 0 => success; value is the number of elements filled in
1872 = 0 => success, but offsets is not big enough
1873 -1 => failed to match
1874 < -1 => some kind of unexpected problem
1877 static void tryFirstByteOptimization(const UChar*& subjectPtr, const UChar* endSubject, int first_byte, bool first_byte_caseless, bool useMultiLineFirstCharOptimization, const UChar* originalSubjectStart)
1879 // If first_byte is set, try scanning to the first instance of that byte
1880 // no need to try and match against any earlier part of the subject string.
1881 if (first_byte >= 0) {
1882 UChar first_char = first_byte;
1883 if (first_byte_caseless)
1884 while (subjectPtr < endSubject) {
1885 int c = *subjectPtr;
1888 if (toLowerCase(c) == first_char)
1893 while (subjectPtr < endSubject && *subjectPtr != first_char)
1896 } else if (useMultiLineFirstCharOptimization) {
1897 /* Or to just after \n for a multiline match if possible */
1898 // I'm not sure why this != originalSubjectStart check is necessary -- ecs 11/18/07
1899 if (subjectPtr > originalSubjectStart) {
1900 while (subjectPtr < endSubject && !isNewline(subjectPtr[-1]))
1906 static bool tryRequiredByteOptimization(const UChar*& subjectPtr, const UChar* endSubject, int req_byte, int req_byte2, bool req_byte_caseless, bool hasFirstByte, const UChar*& req_byte_ptr)
1908 /* If req_byte is set, we know that that character must appear in the subject
1909 for the match to succeed. If the first character is set, req_byte must be
1910 later in the subject; otherwise the test starts at the match point. This
1911 optimization can save a huge amount of backtracking in patterns with nested
1912 unlimited repeats that aren't going to match. Writing separate code for
1913 cased/caseless versions makes it go faster, as does using an autoincrement
1914 and backing off on a match.
1916 HOWEVER: when the subject string is very, very long, searching to its end can
1917 take a long time, and give bad performance on quite ordinary patterns. This
1918 showed up when somebody was matching /^C/ on a 32-megabyte string... so we
1919 don't do this when the string is sufficiently long.
1922 if (req_byte >= 0 && endSubject - subjectPtr < REQ_BYTE_MAX) {
1923 const UChar* p = subjectPtr + (hasFirstByte ? 1 : 0);
1925 /* We don't need to repeat the search if we haven't yet reached the
1926 place we found it at last time. */
1928 if (p > req_byte_ptr) {
1929 if (req_byte_caseless) {
1930 while (p < endSubject) {
1932 if (pp == req_byte || pp == req_byte2) {
1938 while (p < endSubject) {
1939 if (*p++ == req_byte) {
1946 /* If we can't find the required character, break the matching loop */
1948 if (p >= endSubject)
1951 /* If we have found the required character, save the point where we
1952 found it, so that we don't search again next time round the loop if
1953 the start hasn't passed this character yet. */
1961 int jsRegExpExecute(const JSRegExp* re,
1962 const UChar* subject, int length, int start_offset, int* offsets,
1967 ASSERT(offsetcount >= 0);
1968 ASSERT(offsets || offsetcount == 0);
1970 MatchData match_block;
1971 match_block.start_subject = subject;
1972 match_block.end_subject = match_block.start_subject + length;
1973 const UChar* end_subject = match_block.end_subject;
1975 match_block.multiline = (re->options & MatchAcrossMultipleLinesOption);
1976 match_block.ignoreCase = (re->options & IgnoreCaseOption);
1978 /* If the expression has got more back references than the offsets supplied can
1979 hold, we get a temporary chunk of working store to use during the matching.
1980 Otherwise, we can use the vector supplied, rounding down its size to a multiple
1983 int ocount = offsetcount - (offsetcount % 3);
1985 // FIXME: This is lame that we have to second-guess our caller here.
1986 // The API should change to either fail-hard when we don't have enough offset space
1987 // or that we shouldn't ask our callers to pre-allocate in the first place.
1988 bool using_temporary_offsets = false;
1989 if (re->top_backref > 0 && re->top_backref >= ocount/3) {
1990 ocount = re->top_backref * 3 + 3;
1991 match_block.offset_vector = new int[ocount];
1992 if (!match_block.offset_vector)
1993 return JSRegExpErrorNoMemory;
1994 using_temporary_offsets = true;
1995 ASSERT_NOT_REACHED(); // Fail debug builds -- No one should be hitting this vestigal (slow!) code, see comment above.
1997 match_block.offset_vector = offsets;
1999 match_block.offset_end = ocount;
2000 match_block.offset_max = (2*ocount)/3;
2001 match_block.offset_overflow = false;
2003 /* Compute the minimum number of offsets that we need to reset each time. Doing
2004 this makes a huge difference to execution time when there aren't many brackets
2007 int resetcount = 2 + re->top_bracket * 2;
2008 if (resetcount > offsetcount)
2009 resetcount = ocount;
2011 /* Reset the working variable associated with each extraction. These should
2012 never be used unless previously set, but they get saved and restored, and so we
2013 initialize them to avoid reading uninitialized locations. */
2015 if (match_block.offset_vector) {
2016 int* iptr = match_block.offset_vector + ocount;
2017 int* iend = iptr - resetcount/2 + 1;
2018 while (--iptr >= iend)
2022 /* Set up the first character to match, if available. The first_byte value is
2023 never set for an anchored regular expression, but the anchoring may be forced
2024 at run time, so we have to test for anchoring. The first char may be unset for
2025 an unanchored pattern, of course. If there's no first char and the pattern was
2026 studied, there may be a bitmap of possible first characters. */
2028 bool first_byte_caseless = false;
2029 int first_byte = -1;
2030 if (re->options & UseFirstByteOptimizationOption) {
2031 first_byte = re->first_byte & 255;
2032 if ((first_byte_caseless = (re->first_byte & REQ_IGNORE_CASE)))
2033 first_byte = toLowerCase(first_byte);
2036 /* For anchored or unanchored matches, there may be a "last known required
2039 bool req_byte_caseless = false;
2042 if (re->options & UseRequiredByteOptimizationOption) {
2043 req_byte = re->req_byte & 255; // FIXME: This optimization could be made to work for UTF16 chars as well...
2044 req_byte_caseless = (re->req_byte & REQ_IGNORE_CASE);
2045 req_byte2 = flipCase(req_byte);
2048 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
2049 the loop runs just once. */
2051 const UChar* start_match = subject + start_offset;
2052 const UChar* req_byte_ptr = start_match - 1;
2053 bool useMultiLineFirstCharOptimization = re->options & UseMultiLineFirstByteOptimizationOption;
2056 /* Reset the maximum number of extractions we might see. */
2057 if (match_block.offset_vector) {
2058 int* iptr = match_block.offset_vector;
2059 int* iend = iptr + resetcount;
2064 tryFirstByteOptimization(start_match, end_subject, first_byte, first_byte_caseless, useMultiLineFirstCharOptimization, match_block.start_subject + start_offset);
2065 if (tryRequiredByteOptimization(start_match, end_subject, req_byte, req_byte2, req_byte_caseless, first_byte >= 0, req_byte_ptr))
2068 /* When a match occurs, substrings will be set for all internal extractions;
2069 we just need to set up the whole thing as substring 0 before returning. If
2070 there were too many extractions, set the return code to zero. In the case
2071 where we had to get some local store to hold offsets for backreferences, copy
2072 those back references that we can. In this case there need not be overflow
2073 if certain parts of the pattern were not used. */
2075 /* The code starts after the JSRegExp block and the capture name table. */
2076 const uschar* start_code = (const uschar*)(re + 1);
2078 int returnCode = match(start_match, start_code, 2, match_block);
2080 /* When the result is no match, if the subject's first character was a
2081 newline and the PCRE_FIRSTLINE option is set, break (which will return
2082 PCRE_ERROR_NOMATCH). The option requests that a match occur before the first
2083 newline in the subject. Otherwise, advance the pointer to the next character
2084 and continue - but the continuation will actually happen only when the
2085 pattern is not anchored. */
2087 if (returnCode == MATCH_NOMATCH) {
2092 if (returnCode != MATCH_MATCH) {
2093 DPRINTF((">>>> error: returning %d\n", rc));
2097 /* We have a match! Copy the offset information from temporary store if
2100 if (using_temporary_offsets) {
2101 if (offsetcount >= 4) {
2102 memcpy(offsets + 2, match_block.offset_vector + 2, (offsetcount - 2) * sizeof(int));
2103 DPRINTF(("Copied offsets from temporary memory\n"));
2105 if (match_block.end_offset_top > offsetcount)
2106 match_block.offset_overflow = true;
2108 DPRINTF(("Freeing temporary memory\n"));
2109 delete [] match_block.offset_vector;
2112 returnCode = match_block.offset_overflow ? 0 : match_block.end_offset_top / 2;
2114 if (offsetcount < 2)
2117 offsets[0] = start_match - match_block.start_subject;
2118 offsets[1] = match_block.end_match_ptr - match_block.start_subject;
2121 DPRINTF((">>>> returning %d\n", rc));
2123 } while (start_match <= end_subject);
2125 if (using_temporary_offsets) {
2126 DPRINTF(("Freeing temporary memory\n"));
2127 delete [] match_block.offset_vector;
2130 DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
2131 return JSRegExpErrorNoMatch;