#endif
-#define RMATCH(num,rx,rb,rf,rg)\
+#define RMATCH(num, ra, rb, rc)\
{\
if (frame >= stackframes && frame + 1 < stackframesend)\
newframe = frame + 1;\
newframe = (pcre_malloc)(sizeof(matchframe));\
frame->where = RMATCH_WHERE(num);\
newframe->eptr = frame->eptr;\
- newframe->ecode = rb;\
+ newframe->ecode = (ra);\
newframe->offset_top = frame->offset_top;\
- newframe->eptrb = rf;\
- is_group_start = rg;\
+ newframe->eptrb = (rb);\
+ is_group_start = (rc);\
++rdepth;\
newframe->prevframe = frame;\
frame = newframe;\
DPRINTF(("restarting from line %d\n", __LINE__));\
goto HEAP_RECURSE;\
RRETURN_##num:\
- DPRINTF(("did a goto back to line %d\n", __LINE__));\
- rx = result;\
--rdepth;\
+ DPRINTF(("did a goto back to line %d\n", __LINE__));\
}
#define RRETURN(ra)\
(pcre_free)(newframe);\
if (frame != NULL)\
{\
- result = ra;\
+ rrc = (ra);\
goto RRETURN_LABEL;\
}\
return ra;\
}
-
/* Structure for remembering the local variables in a private frame */
typedef struct matchframe {
const uschar *prev;
const pcre_uchar *saved_eptr;
- BOOL minimize;
-
int repeat_othercase;
int ctype;
int fi;
int length;
int max;
- int min;
int number;
int offset;
- int op;
int save_offset1, save_offset2, save_offset3;
eptrblock newptrb;
returns a negative (error) response, the outer incarnation must also return the
same response.
-Performance note: It might be tempting to extract commonly used fields from the
-md structure (e.g. utf8, end_subject) into individual variables to improve
-performance. Tests using gcc on a SPARC disproved this; in the first case, it
-made performance worse.
-
Arguments:
eptr pointer in subject
ecode position in code
BOOL cur_is_word;
BOOL prev_is_word;
BOOL is_group_start = TRUE;
+int min;
+BOOL minimize = FALSE; /* Initialization not really needed, but some compilers think so. */
/* When recursion is not being used, all "local" variables that have to be
preserved over calls to RMATCH() are part of a "frame" which is obtained from
matchframe *frame = stackframes;
matchframe *newframe;
-int result;
frame->prevframe = NULL; /* Marks the top level */
/* Copy in the original argument variables */
for (;;)
{
- frame->op = *frame->ecode;
- frame->minimize = FALSE;
-
- /* Opening capturing bracket. If there is space in the offset vector, save
- the current subject position in the working slot at the top of the vector. We
- mustn't change the current values of the data slot, because they may be set
- from a previous iteration of this group, and be referred to by a reference
- inside the group.
-
- If the bracket fails to match, we need to restore this value and also the
- values of the final offsets, in case they were set by a previous iteration of
- the same bracket.
-
- If there isn't enough space in the offset vector, treat this as if it were a
- non-capturing bracket. Don't worry about setting the flag for the error case
- here; that is handled in the code for KET. */
-
- if (frame->op > OP_BRA)
- {
- frame->number = frame->op - OP_BRA;
-
- /* For extended extraction brackets (large number), we have to fish out the
- number from a dummy opcode at the start. */
-
- if (frame->number > EXTRACT_BASIC_MAX)
- frame->number = GET2(frame->ecode, 2+LINK_SIZE);
- frame->offset = frame->number << 1;
-
-#ifdef DEBUG
- printf("start bracket %d subject=", frame->number);
- pchars(frame->eptr, 16, TRUE, md);
- printf("\n");
-#endif
-
- if (frame->offset < md->offset_max)
- {
- frame->save_offset1 = md->offset_vector[frame->offset];
- frame->save_offset2 = md->offset_vector[frame->offset+1];
- frame->save_offset3 = md->offset_vector[md->offset_end - frame->number];
-
- DPRINTF(("saving %d %d %d\n", frame->save_offset1, frame->save_offset2, frame->save_offset3));
- md->offset_vector[md->offset_end - frame->number] = frame->eptr - md->start_subject;
-
- do
- {
- RMATCH(1, rrc, frame->ecode + 1 + LINK_SIZE, frame->eptrb, match_isgroup);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- frame->ecode += GET(frame->ecode, 1);
- }
- while (*frame->ecode == OP_ALT);
-
- DPRINTF(("bracket %d failed\n", frame->number));
-
- md->offset_vector[frame->offset] = frame->save_offset1;
- md->offset_vector[frame->offset+1] = frame->save_offset2;
- md->offset_vector[md->offset_end - frame->number] = frame->save_offset3;
-
- RRETURN(MATCH_NOMATCH);
- }
-
- /* Insufficient room for saving captured contents */
-
- else frame->op = OP_BRA;
- }
-
- /* Other types of node can be handled by a switch */
-
- switch(frame->op)
+ switch (*frame->ecode)
{
case OP_BRA: /* Non-capturing bracket: optimized */
+ NON_CAPTURING_BRACKET:
DPRINTF(("start bracket 0\n"));
do
{
- RMATCH(2, rrc, frame->ecode + 1 + LINK_SIZE, frame->eptrb, match_isgroup);
+ RMATCH(2, frame->ecode + 1 + LINK_SIZE, frame->eptrb, match_isgroup);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
frame->ecode += GET(frame->ecode, 1);
}
case OP_ASSERT:
do
{
- RMATCH(6, rrc, frame->ecode + 1 + LINK_SIZE, NULL, match_isgroup);
+ RMATCH(6, frame->ecode + 1 + LINK_SIZE, NULL, match_isgroup);
if (rrc == MATCH_MATCH) break;
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
frame->ecode += GET(frame->ecode, 1);
case OP_ASSERT_NOT:
do
{
- RMATCH(7, rrc, frame->ecode + 1 + LINK_SIZE, NULL, match_isgroup);
+ RMATCH(7, frame->ecode + 1 + LINK_SIZE, NULL, match_isgroup);
if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
frame->ecode += GET(frame->ecode,1);
do
{
- RMATCH(9, rrc, frame->ecode + 1 + LINK_SIZE, frame->eptrb, match_isgroup);
+ RMATCH(9, frame->ecode + 1 + LINK_SIZE, frame->eptrb, match_isgroup);
if (rrc == MATCH_MATCH) break;
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
frame->ecode += GET(frame->ecode,1);
if (*frame->ecode == OP_KETRMIN)
{
- RMATCH(10, rrc, frame->ecode + 1 + LINK_SIZE, frame->eptrb, 0);
+ RMATCH(10, frame->ecode + 1 + LINK_SIZE, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- RMATCH(11, rrc, frame->prev, frame->eptrb, match_isgroup);
+ RMATCH(11, frame->prev, frame->eptrb, match_isgroup);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
}
else /* OP_KETRMAX */
{
- RMATCH(12, rrc, frame->prev, frame->eptrb, match_isgroup);
+ RMATCH(12, frame->prev, frame->eptrb, match_isgroup);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- RMATCH(13, rrc, frame->ecode + 1+LINK_SIZE, frame->eptrb, 0);
+ RMATCH(13, frame->ecode + 1+LINK_SIZE, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
}
}
case OP_BRAZERO:
{
frame->next = frame->ecode+1;
- RMATCH(14, rrc, frame->next, frame->eptrb, match_isgroup);
+ RMATCH(14, frame->next, frame->eptrb, match_isgroup);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
do frame->next += GET(frame->next,1); while (*frame->next == OP_ALT);
frame->ecode = frame->next + 1+LINK_SIZE;
{
frame->next = frame->ecode+1;
do frame->next += GET(frame->next,1); while (*frame->next == OP_ALT);
- RMATCH(15, rrc, frame->next + 1+LINK_SIZE, frame->eptrb, match_isgroup);
+ RMATCH(15, frame->next + 1+LINK_SIZE, frame->eptrb, match_isgroup);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
frame->ecode++;
}
if (*frame->ecode == OP_KETRMIN)
{
- RMATCH(16, rrc, frame->ecode + 1+LINK_SIZE, frame->eptrb, 0);
+ RMATCH(16, frame->ecode + 1+LINK_SIZE, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- RMATCH(17, rrc, frame->prev, frame->eptrb, match_isgroup);
+ RMATCH(17, frame->prev, frame->eptrb, match_isgroup);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
}
else /* OP_KETRMAX */
{
- RMATCH(18, rrc, frame->prev, frame->eptrb, match_isgroup);
+ RMATCH(18, frame->prev, frame->eptrb, match_isgroup);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- RMATCH(19, rrc, frame->ecode + 1+LINK_SIZE, frame->eptrb, 0);
+ RMATCH(19, frame->ecode + 1+LINK_SIZE, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
}
}
case OP_CRQUERY:
case OP_CRMINQUERY:
c = *frame->ecode++ - OP_CRSTAR;
- frame->minimize = (c & 1) != 0;
- frame->min = rep_min[c]; /* Pick up values from tables; */
+ minimize = (c & 1) != 0;
+ min = rep_min[c]; /* Pick up values from tables; */
frame->max = rep_max[c]; /* zero for max => infinity */
if (frame->max == 0) frame->max = INT_MAX;
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
- frame->minimize = (*frame->ecode == OP_CRMINRANGE);
- frame->min = GET2(frame->ecode, 1);
+ minimize = (*frame->ecode == OP_CRMINRANGE);
+ min = GET2(frame->ecode, 1);
frame->max = GET2(frame->ecode, 3);
if (frame->max == 0) frame->max = INT_MAX;
frame->ecode += 5;
/* First, ensure the minimum number of matches are present. */
- for (i = 1; i <= frame->min; i++)
+ for (i = 1; i <= min; i++)
{
if (!match_ref(frame->offset, frame->eptr, frame->length, md)) RRETURN(MATCH_NOMATCH);
frame->eptr += frame->length;
/* If min = max, continue at the same level without recursion.
They are not both allowed to be zero. */
- if (frame->min == frame->max) continue;
+ if (min == frame->max) continue;
/* If minimizing, keep trying and advancing the pointer */
- if (frame->minimize)
+ if (minimize)
{
- for (frame->fi = frame->min;; frame->fi++)
+ for (frame->fi = min;; frame->fi++)
{
- RMATCH(20, rrc, frame->ecode, frame->eptrb, 0);
+ RMATCH(20, frame->ecode, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (frame->fi >= frame->max || !match_ref(frame->offset, frame->eptr, frame->length, md))
RRETURN(MATCH_NOMATCH);
else
{
frame->pp = frame->eptr;
- for (i = frame->min; i < frame->max; i++)
+ for (i = min; i < frame->max; i++)
{
if (!match_ref(frame->offset, frame->eptr, frame->length, md)) break;
frame->eptr += frame->length;
}
while (frame->eptr >= frame->pp)
{
- RMATCH(21, rrc, frame->ecode, frame->eptrb, 0);
+ RMATCH(21, frame->ecode, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
frame->eptr -= frame->length;
}
case OP_CRQUERY:
case OP_CRMINQUERY:
c = *frame->ecode++ - OP_CRSTAR;
- frame->minimize = (c & 1) != 0;
- frame->min = rep_min[c]; /* Pick up values from tables; */
+ minimize = (c & 1) != 0;
+ min = rep_min[c]; /* Pick up values from tables; */
frame->max = rep_max[c]; /* zero for max => infinity */
if (frame->max == 0) frame->max = INT_MAX;
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
- frame->minimize = (*frame->ecode == OP_CRMINRANGE);
- frame->min = GET2(frame->ecode, 1);
+ minimize = (*frame->ecode == OP_CRMINRANGE);
+ min = GET2(frame->ecode, 1);
frame->max = GET2(frame->ecode, 3);
if (frame->max == 0) frame->max = INT_MAX;
frame->ecode += 5;
break;
default: /* No repeat follows */
- frame->min = frame->max = 1;
+ min = frame->max = 1;
break;
}
/* First, ensure the minimum number of matches are present. */
{
- for (i = 1; i <= frame->min; i++)
+ for (i = 1; i <= min; i++)
{
if (frame->eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, frame->eptr);
if (c > 255)
{
- if (frame->op == OP_CLASS) RRETURN(MATCH_NOMATCH);
+ if (frame->data[-1] == OP_CLASS) RRETURN(MATCH_NOMATCH);
}
else
{
/* If max == min we can continue with the main loop without the
need to recurse. */
- if (frame->min == frame->max) continue;
+ if (min == frame->max) continue;
/* If minimizing, keep testing the rest of the expression and advancing
the pointer while it matches the class. */
- if (frame->minimize)
+ if (minimize)
{
{
- for (frame->fi = frame->min;; frame->fi++)
+ for (frame->fi = min;; frame->fi++)
{
- RMATCH(22, rrc, frame->ecode, frame->eptrb, 0);
+ RMATCH(22, frame->ecode, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (frame->fi >= frame->max || frame->eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, frame->eptr);
if (c > 255)
{
- if (frame->op == OP_CLASS) RRETURN(MATCH_NOMATCH);
+ if (frame->data[-1] == OP_CLASS) RRETURN(MATCH_NOMATCH);
}
else
{
frame->pp = frame->eptr;
{
- for (i = frame->min; i < frame->max; i++)
+ for (i = min; i < frame->max; i++)
{
int len = 1;
if (frame->eptr >= md->end_subject) break;
GETCHARLEN(c, frame->eptr, len);
if (c > 255)
{
- if (frame->op == OP_CLASS) break;
+ if (frame->data[-1] == OP_CLASS) break;
}
else
{
}
for (;;)
{
- RMATCH(24, rrc, frame->ecode, frame->eptrb, 0);
+ RMATCH(24, frame->ecode, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (frame->eptr-- == frame->pp) break; /* Stop if tried at original pos */
BACKCHAR(frame->eptr);
case OP_CRQUERY:
case OP_CRMINQUERY:
c = *frame->ecode++ - OP_CRSTAR;
- frame->minimize = (c & 1) != 0;
- frame->min = rep_min[c]; /* Pick up values from tables; */
+ minimize = (c & 1) != 0;
+ min = rep_min[c]; /* Pick up values from tables; */
frame->max = rep_max[c]; /* zero for max => infinity */
if (frame->max == 0) frame->max = INT_MAX;
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
- frame->minimize = (*frame->ecode == OP_CRMINRANGE);
- frame->min = GET2(frame->ecode, 1);
+ minimize = (*frame->ecode == OP_CRMINRANGE);
+ min = GET2(frame->ecode, 1);
frame->max = GET2(frame->ecode, 3);
if (frame->max == 0) frame->max = INT_MAX;
frame->ecode += 5;
break;
default: /* No repeat follows */
- frame->min = frame->max = 1;
+ min = frame->max = 1;
break;
}
/* First, ensure the minimum number of matches are present. */
- for (i = 1; i <= frame->min; i++)
+ for (i = 1; i <= min; i++)
{
if (frame->eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, frame->eptr);
/* If max == min we can continue with the main loop without the
need to recurse. */
- if (frame->min == frame->max) continue;
+ if (min == frame->max) continue;
/* If minimizing, keep testing the rest of the expression and advancing
the pointer while it matches the class. */
- if (frame->minimize)
+ if (minimize)
{
- for (frame->fi = frame->min;; frame->fi++)
+ for (frame->fi = min;; frame->fi++)
{
- RMATCH(26, rrc, frame->ecode, frame->eptrb, 0);
+ RMATCH(26, frame->ecode, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (frame->fi >= frame->max || frame->eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, frame->eptr);
else
{
frame->pp = frame->eptr;
- for (i = frame->min; i < frame->max; i++)
+ for (i = min; i < frame->max; i++)
{
int len = 1;
if (frame->eptr >= md->end_subject) break;
}
for(;;)
{
- RMATCH(27, rrc, frame->ecode, frame->eptrb, 0);
+ RMATCH(27, frame->ecode, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (frame->eptr-- == frame->pp) break; /* Stop if tried at original pos */
BACKCHAR(frame->eptr)
}
break;
+ /* Match a single ASCII character. */
+
+ case OP_ASCII_CHAR:
+ if (md->end_subject == frame->eptr)
+ RRETURN(MATCH_NOMATCH);
+ if (*frame->eptr != frame->ecode[1])
+ RRETURN(MATCH_NOMATCH);
+ ++frame->eptr;
+ frame->ecode += 2;
+ break;
+
+ /* Match one of two cases of an ASCII character. */
+
+ case OP_ASCII_LETTER_NC:
+ if (md->end_subject == frame->eptr)
+ RRETURN(MATCH_NOMATCH);
+ if ((*frame->eptr | 0x20) != frame->ecode[1])
+ RRETURN(MATCH_NOMATCH);
+ ++frame->eptr;
+ frame->ecode += 2;
+ break;
+
/* Match a single character repeatedly; different opcodes share code. */
case OP_EXACT:
- frame->min = frame->max = GET2(frame->ecode, 1);
+ min = frame->max = GET2(frame->ecode, 1);
+ minimize = FALSE;
frame->ecode += 3;
goto REPEATCHAR;
case OP_UPTO:
case OP_MINUPTO:
- frame->min = 0;
+ min = 0;
frame->max = GET2(frame->ecode, 1);
- frame->minimize = *frame->ecode == OP_MINUPTO;
+ minimize = *frame->ecode == OP_MINUPTO;
frame->ecode += 3;
goto REPEATCHAR;
case OP_QUERY:
case OP_MINQUERY:
c = *frame->ecode++ - OP_STAR;
- frame->minimize = (c & 1) != 0;
- frame->min = rep_min[c]; /* Pick up values from tables; */
+ minimize = (c & 1) != 0;
+ min = rep_min[c]; /* Pick up values from tables; */
frame->max = rep_max[c]; /* zero for max => infinity */
if (frame->max == 0) frame->max = INT_MAX;
frame->length = 1;
GETUTF8CHARLEN(frame->fc, frame->ecode, frame->length);
{
- if (frame->min * (frame->fc > 0xFFFF ? 2 : 1) > md->end_subject - frame->eptr) RRETURN(MATCH_NOMATCH);
+ if (min * (frame->fc > 0xFFFF ? 2 : 1) > md->end_subject - frame->eptr) RRETURN(MATCH_NOMATCH);
frame->ecode += frame->length;
if (frame->fc <= 0xFFFF)
{
int othercase = md->caseless ? _pcre_ucp_othercase(frame->fc) : -1;
- for (i = 1; i <= frame->min; i++)
+ for (i = 1; i <= min; i++)
{
if (*frame->eptr != frame->fc && *frame->eptr != othercase) RRETURN(MATCH_NOMATCH);
++frame->eptr;
}
- if (frame->min == frame->max) continue;
+ if (min == frame->max) continue;
- if (frame->minimize)
+ if (minimize)
{
frame->repeat_othercase = othercase;
- for (frame->fi = frame->min;; frame->fi++)
+ for (frame->fi = min;; frame->fi++)
{
- RMATCH(28, rrc, frame->ecode, frame->eptrb, 0);
+ RMATCH(28, frame->ecode, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (frame->fi >= frame->max || frame->eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
if (*frame->eptr != frame->fc && *frame->eptr != frame->repeat_othercase) RRETURN(MATCH_NOMATCH);
else
{
frame->pp = frame->eptr;
- for (i = frame->min; i < frame->max; i++)
+ for (i = min; i < frame->max; i++)
{
if (frame->eptr >= md->end_subject) break;
if (*frame->eptr != frame->fc && *frame->eptr != othercase) break;
}
while (frame->eptr >= frame->pp)
{
- RMATCH(29, rrc, frame->ecode, frame->eptrb, 0);
+ RMATCH(29, frame->ecode, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
--frame->eptr;
}
{
/* No case on surrogate pairs, so no need to bother with "othercase". */
- for (i = 1; i <= frame->min; i++)
+ for (i = 1; i <= min; i++)
{
int nc;
GETCHAR(nc, frame->eptr);
frame->eptr += 2;
}
- if (frame->min == frame->max) continue;
+ if (min == frame->max) continue;
- if (frame->minimize)
+ if (minimize)
{
- for (frame->fi = frame->min;; frame->fi++)
+ for (frame->fi = min;; frame->fi++)
{
int nc;
- RMATCH(30, rrc, frame->ecode, frame->eptrb, 0);
+ RMATCH(30, frame->ecode, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (frame->fi >= frame->max || frame->eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHAR(nc, frame->eptr);
else
{
frame->pp = frame->eptr;
- for (i = frame->min; i < frame->max; i++)
+ for (i = min; i < frame->max; i++)
{
int nc;
if (frame->eptr > md->end_subject - 2) break;
}
while (frame->eptr >= frame->pp)
{
- RMATCH(31, rrc, frame->ecode, frame->eptrb, 0);
+ RMATCH(31, frame->ecode, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
frame->eptr -= 2;
}
about... */
case OP_NOTEXACT:
- frame->min = frame->max = GET2(frame->ecode, 1);
+ min = frame->max = GET2(frame->ecode, 1);
+ minimize = FALSE;
frame->ecode += 3;
goto REPEATNOTCHAR;
case OP_NOTUPTO:
case OP_NOTMINUPTO:
- frame->min = 0;
+ min = 0;
frame->max = GET2(frame->ecode, 1);
- frame->minimize = *frame->ecode == OP_NOTMINUPTO;
+ minimize = *frame->ecode == OP_NOTMINUPTO;
frame->ecode += 3;
goto REPEATNOTCHAR;
case OP_NOTQUERY:
case OP_NOTMINQUERY:
c = *frame->ecode++ - OP_NOTSTAR;
- frame->minimize = (c & 1) != 0;
- frame->min = rep_min[c]; /* Pick up values from tables; */
+ minimize = (c & 1) != 0;
+ min = rep_min[c]; /* Pick up values from tables; */
frame->max = rep_max[c]; /* zero for max => infinity */
if (frame->max == 0) frame->max = INT_MAX;
subject. */
REPEATNOTCHAR:
- if (frame->min > md->end_subject - frame->eptr) RRETURN(MATCH_NOMATCH);
+ if (min > md->end_subject - frame->eptr) RRETURN(MATCH_NOMATCH);
frame->fc = *frame->ecode++;
/* The code is duplicated for the caseless and caseful cases, for speed,
maximum. Alternatively, if maximizing, find the maximum number of
characters and work backwards. */
- DPRINTF(("negative matching %c{%d,%d}\n", frame->fc, frame->min, frame->max));
+ DPRINTF(("negative matching %c{%d,%d}\n", frame->fc, min, frame->max));
if (md->caseless)
{
{
register int d;
- for (i = 1; i <= frame->min; i++)
+ for (i = 1; i <= min; i++)
{
GETCHARINC(d, frame->eptr);
if (d < 128) d = md->lcc[d];
}
}
- if (frame->min == frame->max) continue;
+ if (min == frame->max) continue;
- if (frame->minimize)
+ if (minimize)
{
{
register int d;
- for (frame->fi = frame->min;; frame->fi++)
+ for (frame->fi = min;; frame->fi++)
{
- RMATCH(38, rrc, frame->ecode, frame->eptrb, 0);
+ RMATCH(38, frame->ecode, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
GETCHARINC(d, frame->eptr);
if (d < 128) d = md->lcc[d];
{
register int d;
- for (i = frame->min; i < frame->max; i++)
+ for (i = min; i < frame->max; i++)
{
int len = 1;
if (frame->eptr >= md->end_subject) break;
}
for(;;)
{
- RMATCH(40, rrc, frame->ecode, frame->eptrb, 0);
+ RMATCH(40, frame->ecode, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (frame->eptr-- == frame->pp) break; /* Stop if tried at original pos */
BACKCHAR(frame->eptr);
{
{
register int d;
- for (i = 1; i <= frame->min; i++)
+ for (i = 1; i <= min; i++)
{
GETCHARINC(d, frame->eptr);
if (frame->fc == d) RRETURN(MATCH_NOMATCH);
}
}
- if (frame->min == frame->max) continue;
+ if (min == frame->max) continue;
- if (frame->minimize)
+ if (minimize)
{
{
register int d;
- for (frame->fi = frame->min;; frame->fi++)
+ for (frame->fi = min;; frame->fi++)
{
- RMATCH(42, rrc, frame->ecode, frame->eptrb, 0);
+ RMATCH(42, frame->ecode, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
GETCHARINC(d, frame->eptr);
if (frame->fi >= frame->max || frame->eptr >= md->end_subject || frame->fc == d)
{
register int d;
- for (i = frame->min; i < frame->max; i++)
+ for (i = min; i < frame->max; i++)
{
int len = 1;
if (frame->eptr >= md->end_subject) break;
}
for(;;)
{
- RMATCH(44, rrc, frame->ecode, frame->eptrb, 0);
+ RMATCH(44, frame->ecode, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (frame->eptr-- == frame->pp) break; /* Stop if tried at original pos */
BACKCHAR(frame->eptr);
repeat it in the interests of efficiency. */
case OP_TYPEEXACT:
- frame->min = frame->max = GET2(frame->ecode, 1);
- frame->minimize = TRUE;
+ min = frame->max = GET2(frame->ecode, 1);
+ minimize = TRUE;
frame->ecode += 3;
goto REPEATTYPE;
case OP_TYPEUPTO:
case OP_TYPEMINUPTO:
- frame->min = 0;
+ min = 0;
frame->max = GET2(frame->ecode, 1);
- frame->minimize = *frame->ecode == OP_TYPEMINUPTO;
+ minimize = *frame->ecode == OP_TYPEMINUPTO;
frame->ecode += 3;
goto REPEATTYPE;
case OP_TYPEQUERY:
case OP_TYPEMINQUERY:
c = *frame->ecode++ - OP_TYPESTAR;
- frame->minimize = (c & 1) != 0;
- frame->min = rep_min[c]; /* Pick up values from tables; */
+ minimize = (c & 1) != 0;
+ min = rep_min[c]; /* Pick up values from tables; */
frame->max = rep_max[c]; /* zero for max => infinity */
if (frame->max == 0) frame->max = INT_MAX;
is tidier. Also separate the UCP code, which can be the same for both UTF-8
and single-bytes. */
- if (frame->min > md->end_subject - frame->eptr) RRETURN(MATCH_NOMATCH);
- if (frame->min > 0)
+ if (min > md->end_subject - frame->eptr) RRETURN(MATCH_NOMATCH);
+ if (min > 0)
{
switch(frame->ctype)
{
case OP_ANY:
- for (i = 1; i <= frame->min; i++)
+ for (i = 1; i <= min; i++)
{
if (frame->eptr >= md->end_subject || IS_NEWLINE(*frame->eptr))
RRETURN(MATCH_NOMATCH);
break;
case OP_NOT_DIGIT:
- for (i = 1; i <= frame->min; i++)
+ for (i = 1; i <= min; i++)
{
if (frame->eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, frame->eptr);
break;
case OP_DIGIT:
- for (i = 1; i <= frame->min; i++)
+ for (i = 1; i <= min; i++)
{
if (frame->eptr >= md->end_subject ||
*frame->eptr >= 128 || (md->ctypes[*frame->eptr++] & ctype_digit) == 0)
break;
case OP_NOT_WHITESPACE:
- for (i = 1; i <= frame->min; i++)
+ for (i = 1; i <= min; i++)
{
if (frame->eptr >= md->end_subject ||
(*frame->eptr < 128 && (md->ctypes[*frame->eptr] & ctype_space) != 0))
break;
case OP_WHITESPACE:
- for (i = 1; i <= frame->min; i++)
+ for (i = 1; i <= min; i++)
{
if (frame->eptr >= md->end_subject ||
*frame->eptr >= 128 || (md->ctypes[*frame->eptr++] & ctype_space) == 0)
break;
case OP_NOT_WORDCHAR:
- for (i = 1; i <= frame->min; i++)
+ for (i = 1; i <= min; i++)
{
if (frame->eptr >= md->end_subject ||
(*frame->eptr < 128 && (md->ctypes[*frame->eptr] & ctype_word) != 0))
break;
case OP_WORDCHAR:
- for (i = 1; i <= frame->min; i++)
+ for (i = 1; i <= min; i++)
{
if (frame->eptr >= md->end_subject ||
*frame->eptr >= 128 || (md->ctypes[*frame->eptr++] & ctype_word) == 0)
/* If min = max, continue at the same level without recursing */
- if (frame->min == frame->max) continue;
+ if (min == frame->max) continue;
/* If minimizing, we have to test the rest of the pattern before each
- subsequent match. Again, separate the UTF-8 case for speed, and also
- separate the UCP cases. */
+ subsequent match. */
- if (frame->minimize)
+ if (minimize)
{
{
- for (frame->fi = frame->min;; frame->fi++)
+ for (frame->fi = min;; frame->fi++)
{
- RMATCH(48, rrc, frame->ecode, frame->eptrb, 0);
+ RMATCH(48, frame->ecode, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (frame->fi >= frame->max || frame->eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
}
/* If maximizing it is worth using inline code for speed, doing the type
- test once at the start (i.e. keep it out of the loop). Again, keep the
- UTF-8 and UCP stuff separate. */
+ test once at the start (i.e. keep it out of the loop). */
else
{
if (frame->max < INT_MAX)
{
{
- for (i = frame->min; i < frame->max; i++)
+ for (i = min; i < frame->max; i++)
{
if (frame->eptr >= md->end_subject || IS_NEWLINE(*frame->eptr)) break;
frame->eptr++;
else
{
{
- for (i = frame->min; i < frame->max; i++)
+ for (i = min; i < frame->max; i++)
{
if (frame->eptr >= md->end_subject || IS_NEWLINE(*frame->eptr)) break;
frame->eptr++;
break;
case OP_NOT_DIGIT:
- for (i = frame->min; i < frame->max; i++)
+ for (i = min; i < frame->max; i++)
{
int len = 1;
if (frame->eptr >= md->end_subject) break;
break;
case OP_DIGIT:
- for (i = frame->min; i < frame->max; i++)
+ for (i = min; i < frame->max; i++)
{
int len = 1;
if (frame->eptr >= md->end_subject) break;
break;
case OP_NOT_WHITESPACE:
- for (i = frame->min; i < frame->max; i++)
+ for (i = min; i < frame->max; i++)
{
int len = 1;
if (frame->eptr >= md->end_subject) break;
break;
case OP_WHITESPACE:
- for (i = frame->min; i < frame->max; i++)
+ for (i = min; i < frame->max; i++)
{
int len = 1;
if (frame->eptr >= md->end_subject) break;
break;
case OP_NOT_WORDCHAR:
- for (i = frame->min; i < frame->max; i++)
+ for (i = min; i < frame->max; i++)
{
int len = 1;
if (frame->eptr >= md->end_subject) break;
break;
case OP_WORDCHAR:
- for (i = frame->min; i < frame->max; i++)
+ for (i = min; i < frame->max; i++)
{
int len = 1;
if (frame->eptr >= md->end_subject) break;
for(;;)
{
- RMATCH(52, rrc, frame->ecode, frame->eptrb, 0);
+ RMATCH(52, frame->ecode, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (frame->eptr-- == frame->pp) break; /* Stop if tried at original pos */
BACKCHAR(frame->eptr);
}
/* Control never gets here */
+ default:
+ /* Opening capturing bracket. If there is space in the offset vector, save
+ the current subject position in the working slot at the top of the vector. We
+ mustn't change the current values of the data slot, because they may be set
+ from a previous iteration of this group, and be referred to by a reference
+ inside the group.
+
+ If the bracket fails to match, we need to restore this value and also the
+ values of the final offsets, in case they were set by a previous iteration of
+ the same bracket.
+
+ If there isn't enough space in the offset vector, treat this as if it were a
+ non-capturing bracket. Don't worry about setting the flag for the error case
+ here; that is handled in the code for KET. */
+
+ if (*frame->ecode > OP_BRA)
+ {
+ frame->number = *frame->ecode - OP_BRA;
+
+ /* For extended extraction brackets (large number), we have to fish out the
+ number from a dummy opcode at the start. */
+
+ if (frame->number > EXTRACT_BASIC_MAX)
+ frame->number = GET2(frame->ecode, 2+LINK_SIZE);
+ frame->offset = frame->number << 1;
+
+#ifdef DEBUG
+ printf("start bracket %d subject=", frame->number);
+ pchars(frame->eptr, 16, TRUE, md);
+ printf("\n");
+#endif
+
+ if (frame->offset < md->offset_max)
+ {
+ frame->save_offset1 = md->offset_vector[frame->offset];
+ frame->save_offset2 = md->offset_vector[frame->offset+1];
+ frame->save_offset3 = md->offset_vector[md->offset_end - frame->number];
+
+ DPRINTF(("saving %d %d %d\n", frame->save_offset1, frame->save_offset2, frame->save_offset3));
+ md->offset_vector[md->offset_end - frame->number] = frame->eptr - md->start_subject;
+
+ do
+ {
+ RMATCH(1, frame->ecode + 1 + LINK_SIZE, frame->eptrb, match_isgroup);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ frame->ecode += GET(frame->ecode, 1);
+ }
+ while (*frame->ecode == OP_ALT);
+
+ DPRINTF(("bracket %d failed\n", frame->number));
+
+ md->offset_vector[frame->offset] = frame->save_offset1;
+ md->offset_vector[frame->offset+1] = frame->save_offset2;
+ md->offset_vector[md->offset_end - frame->number] = frame->save_offset3;
+
+ RRETURN(MATCH_NOMATCH);
+ }
+
+ /* Insufficient room for saving captured contents */
+
+ goto NON_CAPTURING_BRACKET;
+ }
+
/* There's been some horrible disaster. Since all codes > OP_BRA are
for capturing brackets, and there shouldn't be any gaps between 0 and
OP_BRA, arrival here can only mean there is something seriously wrong
in the code above or the OP_xxx definitions. */
- default:
DPRINTF(("Unknown opcode %d\n", *frame->ecode));
RRETURN(JS_REGEXP_ERROR_INTERNAL);
}
must also be updated to match. */
enum {
- OP_END, /* 0 End of pattern */
+ OP_END, /* End of pattern */
/* Values corresponding to backslashed metacharacters */
- OP_NOT_WORD_BOUNDARY, /* 3 \B */
- OP_WORD_BOUNDARY, /* 4 \b */
- OP_NOT_DIGIT, /* 5 \D */
- OP_DIGIT, /* 6 \d */
- OP_NOT_WHITESPACE, /* 7 \S */
- OP_WHITESPACE, /* 8 \s */
- OP_NOT_WORDCHAR, /* 9 \W */
- OP_WORDCHAR, /* 10 \w */
-
- OP_ANY, /* 11 Match any character */
-
- OP_CIRC, /* 19 Start of line - varies with multiline switch */
- OP_DOLL, /* 20 End of line - varies with multiline switch */
- OP_CHAR, /* 21 Match one character, casefully */
- OP_CHARNC, /* 22 Match one character, caselessly */
- OP_NOT, /* 23 Match anything but the following char */
-
- OP_STAR, /* 24 The maximizing and minimizing versions of */
- OP_MINSTAR, /* 25 all these opcodes must come in pairs, with */
- OP_PLUS, /* 26 the minimizing one second. */
- OP_MINPLUS, /* 27 This first set applies to single characters */
- OP_QUERY, /* 28 */
- OP_MINQUERY, /* 29 */
- OP_UPTO, /* 30 From 0 to n matches */
- OP_MINUPTO, /* 31 */
- OP_EXACT, /* 32 Exactly n matches */
-
- OP_NOTSTAR, /* 33 The maximizing and minimizing versions of */
- OP_NOTMINSTAR, /* 34 all these opcodes must come in pairs, with */
- OP_NOTPLUS, /* 35 the minimizing one second. */
- OP_NOTMINPLUS, /* 36 This set applies to "not" single characters */
- OP_NOTQUERY, /* 37 */
- OP_NOTMINQUERY, /* 38 */
- OP_NOTUPTO, /* 39 From 0 to n matches */
- OP_NOTMINUPTO, /* 40 */
- OP_NOTEXACT, /* 41 Exactly n matches */
-
- OP_TYPESTAR, /* 42 The maximizing and minimizing versions of */
- OP_TYPEMINSTAR, /* 43 all these opcodes must come in pairs, with */
- OP_TYPEPLUS, /* 44 the minimizing one second. These codes must */
- OP_TYPEMINPLUS, /* 45 be in exactly the same order as those above. */
- OP_TYPEQUERY, /* 46 This set applies to character types such as \d */
- OP_TYPEMINQUERY, /* 47 */
- OP_TYPEUPTO, /* 48 From 0 to n matches */
- OP_TYPEMINUPTO, /* 49 */
- OP_TYPEEXACT, /* 50 Exactly n matches */
-
- OP_CRSTAR, /* 51 The maximizing and minimizing versions of */
- OP_CRMINSTAR, /* 52 all these opcodes must come in pairs, with */
- OP_CRPLUS, /* 53 the minimizing one second. These codes must */
- OP_CRMINPLUS, /* 54 be in exactly the same order as those above. */
- OP_CRQUERY, /* 55 These are for character classes and back refs */
- OP_CRMINQUERY, /* 56 */
- OP_CRRANGE, /* 57 These are different to the three sets above. */
- OP_CRMINRANGE, /* 58 */
-
- OP_CLASS, /* 59 Match a character class, chars < 256 only */
- OP_NCLASS, /* 60 Same, but the bitmap was created from a negative
- class - the difference is relevant only when a UTF-8
- character > 255 is encountered. */
-
- OP_XCLASS, /* 61 Extended class for handling UTF-8 chars within the
- class. This does both positive and negative. */
-
- OP_REF, /* 62 Match a back reference */
-
- OP_ALT, /* 65 Start of alternation */
- OP_KET, /* 66 End of group that doesn't have an unbounded repeat */
- OP_KETRMAX, /* 67 These two must remain together and in this */
- OP_KETRMIN, /* 68 order. They are for groups the repeat for ever. */
+ OP_NOT_WORD_BOUNDARY, /* \B */
+ OP_WORD_BOUNDARY, /* \b */
+ OP_NOT_DIGIT, /* \D */
+ OP_DIGIT, /* \d */
+ OP_NOT_WHITESPACE, /* \S */
+ OP_WHITESPACE, /* \s */
+ OP_NOT_WORDCHAR, /* \W */
+ OP_WORDCHAR, /* \w */
+
+ OP_ANY, /* . -- Match any character */
+
+ OP_CIRC, /* ^ */
+ OP_DOLL, /* $ */
+ OP_CHAR, /* Match one character, casefully */
+ OP_CHARNC, /* Match one character, caselessly */
+ OP_ASCII_CHAR, /* Match one ASCII (0-127) character. */
+ OP_ASCII_LETTER_NC, /* Match one ASCII letter, caselessly. */
+ OP_NOT, /* Match anything but the following char */
+
+ OP_STAR, /* The maximizing and minimizing versions of */
+ OP_MINSTAR, /* all these opcodes must come in pairs, with */
+ OP_PLUS, /* the minimizing one second. */
+ OP_MINPLUS, /* This first set applies to single characters */
+ OP_QUERY,
+ OP_MINQUERY,
+ OP_UPTO, /* From 0 to n matches */
+ OP_MINUPTO,
+ OP_EXACT, /* Exactly n matches */
+
+ OP_NOTSTAR, /* This set applies to "not" single characters */
+ OP_NOTMINSTAR,
+ OP_NOTPLUS,
+ OP_NOTMINPLUS,
+ OP_NOTQUERY,
+ OP_NOTMINQUERY,
+ OP_NOTUPTO,
+ OP_NOTMINUPTO,
+ OP_NOTEXACT,
+
+ OP_TYPESTAR, /* This set applies to character types such as \d */
+ OP_TYPEMINSTAR,
+ OP_TYPEPLUS,
+ OP_TYPEMINPLUS,
+ OP_TYPEQUERY,
+ OP_TYPEMINQUERY,
+ OP_TYPEUPTO,
+ OP_TYPEMINUPTO,
+ OP_TYPEEXACT,
+
+ OP_CRSTAR, /* These are for character classes and back refs */
+ OP_CRMINSTAR,
+ OP_CRPLUS,
+ OP_CRMINPLUS,
+ OP_CRQUERY,
+ OP_CRMINQUERY,
+ OP_CRRANGE, /* These are different to the three sets above. */
+ OP_CRMINRANGE,
+
+ OP_CLASS, /* Match a character class, chars < 256 only */
+ OP_NCLASS, /* Same, but the bitmap was created from a negative
+ class - the difference is relevant when a UTF-8
+ character > 255 is encountered. */
+
+ OP_XCLASS, /* Extended class for handling UTF-8 chars within the
+ class. This does both positive and negative. */
+
+ OP_REF, /* Match a back reference */
+
+ OP_ALT, /* Start of alternation */
+ OP_KET, /* End of group that doesn't have an unbounded repeat */
+ OP_KETRMAX, /* These two must remain together and in this */
+ OP_KETRMIN, /* order. They are for groups the repeat for ever. */
/* The assertions must come before ONCE and COND */
- OP_ASSERT, /* 69 Positive lookahead */
- OP_ASSERT_NOT, /* 70 Negative lookahead */
+ OP_ASSERT, /* Positive lookahead */
+ OP_ASSERT_NOT, /* Negative lookahead */
/* ONCE and COND must come after the assertions, with ONCE first, as there's
a test for >= ONCE for a subpattern that isn't an assertion. */
- OP_ONCE, /* 74 Once matched, don't back up into the subpattern */
+ OP_ONCE, /* Once matched, don't back up into the subpattern */
- OP_BRAZERO, /* 77 These two must remain together and in this */
- OP_BRAMINZERO, /* 78 order. */
+ OP_BRAZERO, /* These two must remain together and in this */
+ OP_BRAMINZERO, /* order. */
- OP_BRANUMBER, /* 79 Used for extracting brackets whose number is greater
- than can fit into an opcode. */
+ OP_BRANUMBER, /* Used for extracting brackets whose number is greater
+ than can fit into an opcode. */
- OP_BRA /* 80 This and greater values are used for brackets that
- extract substrings up to EXTRACT_BASIC_MAX. After
- that, use is made of OP_BRANUMBER. */
+ OP_BRA /* This and greater values are used for brackets that
+ extract substrings up to EXTRACT_BASIC_MAX. After
+ that, use is made of OP_BRANUMBER. */
};
/* WARNING WARNING WARNING: There is an implicit assumption in pcre.c and
1, 1, 1, 1, 1, 1, 1, 1, /* \B, \b, \D, \d, \S, \s, \W, \w */ \
1, /* Any */ \
1, 1, /* ^, $ */ \
- 2, /* Char - the minimum length */ \
- 2, /* Charnc - the minimum length */ \
+ 2, 2, /* Char, Charnc - minimum lengths */ \
+ 2, 2, /* ASCII char or non-cased */ \
2, /* not */ \
/* Positive single-char repeats ** These are */ \
2, 2, 2, 2, 2, 2, /* *, *?, +, +?, ?, ?? ** minima in */ \