#endif
-#define RMATCH(num,rx,rb,rf,rg)\
+#define RMATCH(num, ra, rb, rc)\
{\
if (frame >= stackframes && frame + 1 < stackframesend)\
newframe = frame + 1;\
newframe = (pcre_malloc)(sizeof(matchframe));\
frame->where = RMATCH_WHERE(num);\
newframe->eptr = frame->eptr;\
- newframe->ecode = rb;\
+ newframe->ecode = (ra);\
newframe->offset_top = frame->offset_top;\
- newframe->eptrb = rf;\
- is_group_start = rg;\
+ newframe->eptrb = (rb);\
+ is_group_start = (rc);\
++rdepth;\
newframe->prevframe = frame;\
frame = newframe;\
DPRINTF(("restarting from line %d\n", __LINE__));\
goto HEAP_RECURSE;\
RRETURN_##num:\
- DPRINTF(("did a goto back to line %d\n", __LINE__));\
- rx = result;\
--rdepth;\
+ DPRINTF(("did a goto back to line %d\n", __LINE__));\
}
#define RRETURN(ra)\
(pcre_free)(newframe);\
if (frame != NULL)\
{\
- result = ra;\
+ rrc = (ra);\
goto RRETURN_LABEL;\
}\
return ra;\
}
-
/* Structure for remembering the local variables in a private frame */
typedef struct matchframe {
const uschar *prev;
const pcre_uchar *saved_eptr;
- BOOL minimize;
-
int repeat_othercase;
int ctype;
int fi;
int length;
int max;
- int min;
int number;
int offset;
- int op;
int save_offset1, save_offset2, save_offset3;
eptrblock newptrb;
returns a negative (error) response, the outer incarnation must also return the
same response.
-Performance note: It might be tempting to extract commonly used fields from the
-md structure (e.g. utf8, end_subject) into individual variables to improve
-performance. Tests using gcc on a SPARC disproved this; in the first case, it
-made performance worse.
-
Arguments:
eptr pointer in subject
ecode position in code
BOOL cur_is_word;
BOOL prev_is_word;
BOOL is_group_start = TRUE;
+int min;
+BOOL minimize = FALSE; /* Initialization not really needed, but some compilers think so. */
/* When recursion is not being used, all "local" variables that have to be
preserved over calls to RMATCH() are part of a "frame" which is obtained from
matchframe *frame = stackframes;
matchframe *newframe;
-int result;
frame->prevframe = NULL; /* Marks the top level */
/* Copy in the original argument variables */
for (;;)
{
- frame->op = *frame->ecode;
- frame->minimize = FALSE;
-
- /* Opening capturing bracket. If there is space in the offset vector, save
- the current subject position in the working slot at the top of the vector. We
- mustn't change the current values of the data slot, because they may be set
- from a previous iteration of this group, and be referred to by a reference
- inside the group.
-
- If the bracket fails to match, we need to restore this value and also the
- values of the final offsets, in case they were set by a previous iteration of
- the same bracket.
-
- If there isn't enough space in the offset vector, treat this as if it were a
- non-capturing bracket. Don't worry about setting the flag for the error case
- here; that is handled in the code for KET. */
-
- if (frame->op > OP_BRA)
- {
- frame->number = frame->op - OP_BRA;
-
- /* For extended extraction brackets (large number), we have to fish out the
- number from a dummy opcode at the start. */
-
- if (frame->number > EXTRACT_BASIC_MAX)
- frame->number = GET2(frame->ecode, 2+LINK_SIZE);
- frame->offset = frame->number << 1;
-
-#ifdef DEBUG
- printf("start bracket %d subject=", frame->number);
- pchars(frame->eptr, 16, TRUE, md);
- printf("\n");
-#endif
-
- if (frame->offset < md->offset_max)
- {
- frame->save_offset1 = md->offset_vector[frame->offset];
- frame->save_offset2 = md->offset_vector[frame->offset+1];
- frame->save_offset3 = md->offset_vector[md->offset_end - frame->number];
-
- DPRINTF(("saving %d %d %d\n", frame->save_offset1, frame->save_offset2, frame->save_offset3));
- md->offset_vector[md->offset_end - frame->number] = frame->eptr - md->start_subject;
-
- do
- {
- RMATCH(1, rrc, frame->ecode + 1 + LINK_SIZE, frame->eptrb, match_isgroup);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- frame->ecode += GET(frame->ecode, 1);
- }
- while (*frame->ecode == OP_ALT);
-
- DPRINTF(("bracket %d failed\n", frame->number));
-
- md->offset_vector[frame->offset] = frame->save_offset1;
- md->offset_vector[frame->offset+1] = frame->save_offset2;
- md->offset_vector[md->offset_end - frame->number] = frame->save_offset3;
-
- RRETURN(MATCH_NOMATCH);
- }
-
- /* Insufficient room for saving captured contents */
-
- else frame->op = OP_BRA;
- }
-
- /* Other types of node can be handled by a switch */
-
- switch(frame->op)
+ switch (*frame->ecode)
{
case OP_BRA: /* Non-capturing bracket: optimized */
+ NON_CAPTURING_BRACKET:
DPRINTF(("start bracket 0\n"));
do
{
- RMATCH(2, rrc, frame->ecode + 1 + LINK_SIZE, frame->eptrb, match_isgroup);
+ RMATCH(2, frame->ecode + 1 + LINK_SIZE, frame->eptrb, match_isgroup);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
frame->ecode += GET(frame->ecode, 1);
}
case OP_ASSERT:
do
{
- RMATCH(6, rrc, frame->ecode + 1 + LINK_SIZE, NULL, match_isgroup);
+ RMATCH(6, frame->ecode + 1 + LINK_SIZE, NULL, match_isgroup);
if (rrc == MATCH_MATCH) break;
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
frame->ecode += GET(frame->ecode, 1);
case OP_ASSERT_NOT:
do
{
- RMATCH(7, rrc, frame->ecode + 1 + LINK_SIZE, NULL, match_isgroup);
+ RMATCH(7, frame->ecode + 1 + LINK_SIZE, NULL, match_isgroup);
if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
frame->ecode += GET(frame->ecode,1);
do
{
- RMATCH(9, rrc, frame->ecode + 1 + LINK_SIZE, frame->eptrb, match_isgroup);
+ RMATCH(9, frame->ecode + 1 + LINK_SIZE, frame->eptrb, match_isgroup);
if (rrc == MATCH_MATCH) break;
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
frame->ecode += GET(frame->ecode,1);
if (*frame->ecode == OP_KETRMIN)
{
- RMATCH(10, rrc, frame->ecode + 1 + LINK_SIZE, frame->eptrb, 0);
+ RMATCH(10, frame->ecode + 1 + LINK_SIZE, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- RMATCH(11, rrc, frame->prev, frame->eptrb, match_isgroup);
+ RMATCH(11, frame->prev, frame->eptrb, match_isgroup);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
}
else /* OP_KETRMAX */
{
- RMATCH(12, rrc, frame->prev, frame->eptrb, match_isgroup);
+ RMATCH(12, frame->prev, frame->eptrb, match_isgroup);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- RMATCH(13, rrc, frame->ecode + 1+LINK_SIZE, frame->eptrb, 0);
+ RMATCH(13, frame->ecode + 1+LINK_SIZE, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
}
}
case OP_BRAZERO:
{
frame->next = frame->ecode+1;
- RMATCH(14, rrc, frame->next, frame->eptrb, match_isgroup);
+ RMATCH(14, frame->next, frame->eptrb, match_isgroup);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
do frame->next += GET(frame->next,1); while (*frame->next == OP_ALT);
frame->ecode = frame->next + 1+LINK_SIZE;
{
frame->next = frame->ecode+1;
do frame->next += GET(frame->next,1); while (*frame->next == OP_ALT);
- RMATCH(15, rrc, frame->next + 1+LINK_SIZE, frame->eptrb, match_isgroup);
+ RMATCH(15, frame->next + 1+LINK_SIZE, frame->eptrb, match_isgroup);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
frame->ecode++;
}
if (*frame->ecode == OP_KETRMIN)
{
- RMATCH(16, rrc, frame->ecode + 1+LINK_SIZE, frame->eptrb, 0);
+ RMATCH(16, frame->ecode + 1+LINK_SIZE, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- RMATCH(17, rrc, frame->prev, frame->eptrb, match_isgroup);
+ RMATCH(17, frame->prev, frame->eptrb, match_isgroup);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
}
else /* OP_KETRMAX */
{
- RMATCH(18, rrc, frame->prev, frame->eptrb, match_isgroup);
+ RMATCH(18, frame->prev, frame->eptrb, match_isgroup);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- RMATCH(19, rrc, frame->ecode + 1+LINK_SIZE, frame->eptrb, 0);
+ RMATCH(19, frame->ecode + 1+LINK_SIZE, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
}
}
case OP_CRQUERY:
case OP_CRMINQUERY:
c = *frame->ecode++ - OP_CRSTAR;
- frame->minimize = (c & 1) != 0;
- frame->min = rep_min[c]; /* Pick up values from tables; */
+ minimize = (c & 1) != 0;
+ min = rep_min[c]; /* Pick up values from tables; */
frame->max = rep_max[c]; /* zero for max => infinity */
if (frame->max == 0) frame->max = INT_MAX;
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
- frame->minimize = (*frame->ecode == OP_CRMINRANGE);
- frame->min = GET2(frame->ecode, 1);
+ minimize = (*frame->ecode == OP_CRMINRANGE);
+ min = GET2(frame->ecode, 1);
frame->max = GET2(frame->ecode, 3);
if (frame->max == 0) frame->max = INT_MAX;
frame->ecode += 5;
/* First, ensure the minimum number of matches are present. */
- for (i = 1; i <= frame->min; i++)
+ for (i = 1; i <= min; i++)
{
if (!match_ref(frame->offset, frame->eptr, frame->length, md)) RRETURN(MATCH_NOMATCH);
frame->eptr += frame->length;
/* If min = max, continue at the same level without recursion.
They are not both allowed to be zero. */
- if (frame->min == frame->max) continue;
+ if (min == frame->max) continue;
/* If minimizing, keep trying and advancing the pointer */
- if (frame->minimize)
+ if (minimize)
{
- for (frame->fi = frame->min;; frame->fi++)
+ for (frame->fi = min;; frame->fi++)
{
- RMATCH(20, rrc, frame->ecode, frame->eptrb, 0);
+ RMATCH(20, frame->ecode, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (frame->fi >= frame->max || !match_ref(frame->offset, frame->eptr, frame->length, md))
RRETURN(MATCH_NOMATCH);
else
{
frame->pp = frame->eptr;
- for (i = frame->min; i < frame->max; i++)
+ for (i = min; i < frame->max; i++)
{
if (!match_ref(frame->offset, frame->eptr, frame->length, md)) break;
frame->eptr += frame->length;
}
while (frame->eptr >= frame->pp)
{
- RMATCH(21, rrc, frame->ecode, frame->eptrb, 0);
+ RMATCH(21, frame->ecode, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
frame->eptr -= frame->length;
}
case OP_CRQUERY:
case OP_CRMINQUERY:
c = *frame->ecode++ - OP_CRSTAR;
- frame->minimize = (c & 1) != 0;
- frame->min = rep_min[c]; /* Pick up values from tables; */
+ minimize = (c & 1) != 0;
+ min = rep_min[c]; /* Pick up values from tables; */
frame->max = rep_max[c]; /* zero for max => infinity */
if (frame->max == 0) frame->max = INT_MAX;
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
- frame->minimize = (*frame->ecode == OP_CRMINRANGE);
- frame->min = GET2(frame->ecode, 1);
+ minimize = (*frame->ecode == OP_CRMINRANGE);
+ min = GET2(frame->ecode, 1);
frame->max = GET2(frame->ecode, 3);
if (frame->max == 0) frame->max = INT_MAX;
frame->ecode += 5;
break;
default: /* No repeat follows */
- frame->min = frame->max = 1;
+ min = frame->max = 1;
break;
}
/* First, ensure the minimum number of matches are present. */
{
- for (i = 1; i <= frame->min; i++)
+ for (i = 1; i <= min; i++)
{
if (frame->eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, frame->eptr);
if (c > 255)
{
- if (frame->op == OP_CLASS) RRETURN(MATCH_NOMATCH);
+ if (frame->data[-1] == OP_CLASS) RRETURN(MATCH_NOMATCH);
}
else
{
/* If max == min we can continue with the main loop without the
need to recurse. */
- if (frame->min == frame->max) continue;
+ if (min == frame->max) continue;
/* If minimizing, keep testing the rest of the expression and advancing
the pointer while it matches the class. */
- if (frame->minimize)
+ if (minimize)
{
{
- for (frame->fi = frame->min;; frame->fi++)
+ for (frame->fi = min;; frame->fi++)
{
- RMATCH(22, rrc, frame->ecode, frame->eptrb, 0);
+ RMATCH(22, frame->ecode, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (frame->fi >= frame->max || frame->eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, frame->eptr);
if (c > 255)
{
- if (frame->op == OP_CLASS) RRETURN(MATCH_NOMATCH);
+ if (frame->data[-1] == OP_CLASS) RRETURN(MATCH_NOMATCH);
}
else
{
frame->pp = frame->eptr;
{
- for (i = frame->min; i < frame->max; i++)
+ for (i = min; i < frame->max; i++)
{
int len = 1;
if (frame->eptr >= md->end_subject) break;
GETCHARLEN(c, frame->eptr, len);
if (c > 255)
{
- if (frame->op == OP_CLASS) break;
+ if (frame->data[-1] == OP_CLASS) break;
}
else
{
}
for (;;)
{
- RMATCH(24, rrc, frame->ecode, frame->eptrb, 0);
+ RMATCH(24, frame->ecode, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (frame->eptr-- == frame->pp) break; /* Stop if tried at original pos */
BACKCHAR(frame->eptr);
case OP_CRQUERY:
case OP_CRMINQUERY:
c = *frame->ecode++ - OP_CRSTAR;
- frame->minimize = (c & 1) != 0;
- frame->min = rep_min[c]; /* Pick up values from tables; */
+ minimize = (c & 1) != 0;
+ min = rep_min[c]; /* Pick up values from tables; */
frame->max = rep_max[c]; /* zero for max => infinity */
if (frame->max == 0) frame->max = INT_MAX;
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
- frame->minimize = (*frame->ecode == OP_CRMINRANGE);
- frame->min = GET2(frame->ecode, 1);
+ minimize = (*frame->ecode == OP_CRMINRANGE);
+ min = GET2(frame->ecode, 1);
frame->max = GET2(frame->ecode, 3);
if (frame->max == 0) frame->max = INT_MAX;
frame->ecode += 5;
break;
default: /* No repeat follows */
- frame->min = frame->max = 1;
+ min = frame->max = 1;
break;
}
/* First, ensure the minimum number of matches are present. */
- for (i = 1; i <= frame->min; i++)
+ for (i = 1; i <= min; i++)
{
if (frame->eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, frame->eptr);
/* If max == min we can continue with the main loop without the
need to recurse. */
- if (frame->min == frame->max) continue;
+ if (min == frame->max) continue;
/* If minimizing, keep testing the rest of the expression and advancing
the pointer while it matches the class. */
- if (frame->minimize)
+ if (minimize)
{
- for (frame->fi = frame->min;; frame->fi++)
+ for (frame->fi = min;; frame->fi++)
{
- RMATCH(26, rrc, frame->ecode, frame->eptrb, 0);
+ RMATCH(26, frame->ecode, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (frame->fi >= frame->max || frame->eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, frame->eptr);
else
{
frame->pp = frame->eptr;
- for (i = frame->min; i < frame->max; i++)
+ for (i = min; i < frame->max; i++)
{
int len = 1;
if (frame->eptr >= md->end_subject) break;
}
for(;;)
{
- RMATCH(27, rrc, frame->ecode, frame->eptrb, 0);
+ RMATCH(27, frame->ecode, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (frame->eptr-- == frame->pp) break; /* Stop if tried at original pos */
BACKCHAR(frame->eptr)
}
break;
+ /* Match a single ASCII character. */
+
+ case OP_ASCII_CHAR:
+ if (md->end_subject == frame->eptr)
+ RRETURN(MATCH_NOMATCH);
+ if (*frame->eptr != frame->ecode[1])
+ RRETURN(MATCH_NOMATCH);
+ ++frame->eptr;
+ frame->ecode += 2;
+ break;
+
+ /* Match one of two cases of an ASCII character. */
+
+ case OP_ASCII_LETTER_NC:
+ if (md->end_subject == frame->eptr)
+ RRETURN(MATCH_NOMATCH);
+ if ((*frame->eptr | 0x20) != frame->ecode[1])
+ RRETURN(MATCH_NOMATCH);
+ ++frame->eptr;
+ frame->ecode += 2;
+ break;
+
/* Match a single character repeatedly; different opcodes share code. */
case OP_EXACT:
- frame->min = frame->max = GET2(frame->ecode, 1);
+ min = frame->max = GET2(frame->ecode, 1);
+ minimize = FALSE;
frame->ecode += 3;
goto REPEATCHAR;
case OP_UPTO:
case OP_MINUPTO:
- frame->min = 0;
+ min = 0;
frame->max = GET2(frame->ecode, 1);
- frame->minimize = *frame->ecode == OP_MINUPTO;
+ minimize = *frame->ecode == OP_MINUPTO;
frame->ecode += 3;
goto REPEATCHAR;
case OP_QUERY:
case OP_MINQUERY:
c = *frame->ecode++ - OP_STAR;
- frame->minimize = (c & 1) != 0;
- frame->min = rep_min[c]; /* Pick up values from tables; */
+ minimize = (c & 1) != 0;
+ min = rep_min[c]; /* Pick up values from tables; */
frame->max = rep_max[c]; /* zero for max => infinity */
if (frame->max == 0) frame->max = INT_MAX;
frame->length = 1;
GETUTF8CHARLEN(frame->fc, frame->ecode, frame->length);
{
- if (frame->min * (frame->fc > 0xFFFF ? 2 : 1) > md->end_subject - frame->eptr) RRETURN(MATCH_NOMATCH);
+ if (min * (frame->fc > 0xFFFF ? 2 : 1) > md->end_subject - frame->eptr) RRETURN(MATCH_NOMATCH);
frame->ecode += frame->length;
if (frame->fc <= 0xFFFF)
{
int othercase = md->caseless ? _pcre_ucp_othercase(frame->fc) : -1;
- for (i = 1; i <= frame->min; i++)
+ for (i = 1; i <= min; i++)
{
if (*frame->eptr != frame->fc && *frame->eptr != othercase) RRETURN(MATCH_NOMATCH);
++frame->eptr;
}
- if (frame->min == frame->max) continue;
+ if (min == frame->max) continue;
- if (frame->minimize)
+ if (minimize)
{
frame->repeat_othercase = othercase;
- for (frame->fi = frame->min;; frame->fi++)
+ for (frame->fi = min;; frame->fi++)
{
- RMATCH(28, rrc, frame->ecode, frame->eptrb, 0);
+ RMATCH(28, frame->ecode, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (frame->fi >= frame->max || frame->eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
if (*frame->eptr != frame->fc && *frame->eptr != frame->repeat_othercase) RRETURN(MATCH_NOMATCH);
else
{
frame->pp = frame->eptr;
- for (i = frame->min; i < frame->max; i++)
+ for (i = min; i < frame->max; i++)
{
if (frame->eptr >= md->end_subject) break;
if (*frame->eptr != frame->fc && *frame->eptr != othercase) break;
}
while (frame->eptr >= frame->pp)
{
- RMATCH(29, rrc, frame->ecode, frame->eptrb, 0);
+ RMATCH(29, frame->ecode, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
--frame->eptr;
}
{
/* No case on surrogate pairs, so no need to bother with "othercase". */
- for (i = 1; i <= frame->min; i++)
+ for (i = 1; i <= min; i++)
{
int nc;
GETCHAR(nc, frame->eptr);
frame->eptr += 2;
}
- if (frame->min == frame->max) continue;
+ if (min == frame->max) continue;
- if (frame->minimize)
+ if (minimize)
{
- for (frame->fi = frame->min;; frame->fi++)
+ for (frame->fi = min;; frame->fi++)
{
int nc;
- RMATCH(30, rrc, frame->ecode, frame->eptrb, 0);
+ RMATCH(30, frame->ecode, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (frame->fi >= frame->max || frame->eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHAR(nc, frame->eptr);
else
{
frame->pp = frame->eptr;
- for (i = frame->min; i < frame->max; i++)
+ for (i = min; i < frame->max; i++)
{
int nc;
if (frame->eptr > md->end_subject - 2) break;
}
while (frame->eptr >= frame->pp)
{
- RMATCH(31, rrc, frame->ecode, frame->eptrb, 0);
+ RMATCH(31, frame->ecode, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
frame->eptr -= 2;
}
about... */
case OP_NOTEXACT:
- frame->min = frame->max = GET2(frame->ecode, 1);
+ min = frame->max = GET2(frame->ecode, 1);
+ minimize = FALSE;
frame->ecode += 3;
goto REPEATNOTCHAR;
case OP_NOTUPTO:
case OP_NOTMINUPTO:
- frame->min = 0;
+ min = 0;
frame->max = GET2(frame->ecode, 1);
- frame->minimize = *frame->ecode == OP_NOTMINUPTO;
+ minimize = *frame->ecode == OP_NOTMINUPTO;
frame->ecode += 3;
goto REPEATNOTCHAR;
case OP_NOTQUERY:
case OP_NOTMINQUERY:
c = *frame->ecode++ - OP_NOTSTAR;
- frame->minimize = (c & 1) != 0;
- frame->min = rep_min[c]; /* Pick up values from tables; */
+ minimize = (c & 1) != 0;
+ min = rep_min[c]; /* Pick up values from tables; */
frame->max = rep_max[c]; /* zero for max => infinity */
if (frame->max == 0) frame->max = INT_MAX;
subject. */
REPEATNOTCHAR:
- if (frame->min > md->end_subject - frame->eptr) RRETURN(MATCH_NOMATCH);
+ if (min > md->end_subject - frame->eptr) RRETURN(MATCH_NOMATCH);
frame->fc = *frame->ecode++;
/* The code is duplicated for the caseless and caseful cases, for speed,
maximum. Alternatively, if maximizing, find the maximum number of
characters and work backwards. */
- DPRINTF(("negative matching %c{%d,%d}\n", frame->fc, frame->min, frame->max));
+ DPRINTF(("negative matching %c{%d,%d}\n", frame->fc, min, frame->max));
if (md->caseless)
{
{
register int d;
- for (i = 1; i <= frame->min; i++)
+ for (i = 1; i <= min; i++)
{
GETCHARINC(d, frame->eptr);
if (d < 128) d = md->lcc[d];
}
}
- if (frame->min == frame->max) continue;
+ if (min == frame->max) continue;
- if (frame->minimize)
+ if (minimize)
{
{
register int d;
- for (frame->fi = frame->min;; frame->fi++)
+ for (frame->fi = min;; frame->fi++)
{
- RMATCH(38, rrc, frame->ecode, frame->eptrb, 0);
+ RMATCH(38, frame->ecode, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
GETCHARINC(d, frame->eptr);
if (d < 128) d = md->lcc[d];
{
register int d;
- for (i = frame->min; i < frame->max; i++)
+ for (i = min; i < frame->max; i++)
{
int len = 1;
if (frame->eptr >= md->end_subject) break;
}
for(;;)
{
- RMATCH(40, rrc, frame->ecode, frame->eptrb, 0);
+ RMATCH(40, frame->ecode, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (frame->eptr-- == frame->pp) break; /* Stop if tried at original pos */
BACKCHAR(frame->eptr);
{
{
register int d;
- for (i = 1; i <= frame->min; i++)
+ for (i = 1; i <= min; i++)
{
GETCHARINC(d, frame->eptr);
if (frame->fc == d) RRETURN(MATCH_NOMATCH);
}
}
- if (frame->min == frame->max) continue;
+ if (min == frame->max) continue;
- if (frame->minimize)
+ if (minimize)
{
{
register int d;
- for (frame->fi = frame->min;; frame->fi++)
+ for (frame->fi = min;; frame->fi++)
{
- RMATCH(42, rrc, frame->ecode, frame->eptrb, 0);
+ RMATCH(42, frame->ecode, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
GETCHARINC(d, frame->eptr);
if (frame->fi >= frame->max || frame->eptr >= md->end_subject || frame->fc == d)
{
register int d;
- for (i = frame->min; i < frame->max; i++)
+ for (i = min; i < frame->max; i++)
{
int len = 1;
if (frame->eptr >= md->end_subject) break;
}
for(;;)
{
- RMATCH(44, rrc, frame->ecode, frame->eptrb, 0);
+ RMATCH(44, frame->ecode, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (frame->eptr-- == frame->pp) break; /* Stop if tried at original pos */
BACKCHAR(frame->eptr);
repeat it in the interests of efficiency. */
case OP_TYPEEXACT:
- frame->min = frame->max = GET2(frame->ecode, 1);
- frame->minimize = TRUE;
+ min = frame->max = GET2(frame->ecode, 1);
+ minimize = TRUE;
frame->ecode += 3;
goto REPEATTYPE;
case OP_TYPEUPTO:
case OP_TYPEMINUPTO:
- frame->min = 0;
+ min = 0;
frame->max = GET2(frame->ecode, 1);
- frame->minimize = *frame->ecode == OP_TYPEMINUPTO;
+ minimize = *frame->ecode == OP_TYPEMINUPTO;
frame->ecode += 3;
goto REPEATTYPE;
case OP_TYPEQUERY:
case OP_TYPEMINQUERY:
c = *frame->ecode++ - OP_TYPESTAR;
- frame->minimize = (c & 1) != 0;
- frame->min = rep_min[c]; /* Pick up values from tables; */
+ minimize = (c & 1) != 0;
+ min = rep_min[c]; /* Pick up values from tables; */
frame->max = rep_max[c]; /* zero for max => infinity */
if (frame->max == 0) frame->max = INT_MAX;
is tidier. Also separate the UCP code, which can be the same for both UTF-8
and single-bytes. */
- if (frame->min > md->end_subject - frame->eptr) RRETURN(MATCH_NOMATCH);
- if (frame->min > 0)
+ if (min > md->end_subject - frame->eptr) RRETURN(MATCH_NOMATCH);
+ if (min > 0)
{
switch(frame->ctype)
{
case OP_ANY:
- for (i = 1; i <= frame->min; i++)
+ for (i = 1; i <= min; i++)
{
if (frame->eptr >= md->end_subject || IS_NEWLINE(*frame->eptr))
RRETURN(MATCH_NOMATCH);
break;
case OP_NOT_DIGIT:
- for (i = 1; i <= frame->min; i++)
+ for (i = 1; i <= min; i++)
{
if (frame->eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, frame->eptr);
break;
case OP_DIGIT:
- for (i = 1; i <= frame->min; i++)
+ for (i = 1; i <= min; i++)
{
if (frame->eptr >= md->end_subject ||
*frame->eptr >= 128 || (md->ctypes[*frame->eptr++] & ctype_digit) == 0)
break;
case OP_NOT_WHITESPACE:
- for (i = 1; i <= frame->min; i++)
+ for (i = 1; i <= min; i++)
{
if (frame->eptr >= md->end_subject ||
(*frame->eptr < 128 && (md->ctypes[*frame->eptr] & ctype_space) != 0))
break;
case OP_WHITESPACE:
- for (i = 1; i <= frame->min; i++)
+ for (i = 1; i <= min; i++)
{
if (frame->eptr >= md->end_subject ||
*frame->eptr >= 128 || (md->ctypes[*frame->eptr++] & ctype_space) == 0)
break;
case OP_NOT_WORDCHAR:
- for (i = 1; i <= frame->min; i++)
+ for (i = 1; i <= min; i++)
{
if (frame->eptr >= md->end_subject ||
(*frame->eptr < 128 && (md->ctypes[*frame->eptr] & ctype_word) != 0))
break;
case OP_WORDCHAR:
- for (i = 1; i <= frame->min; i++)
+ for (i = 1; i <= min; i++)
{
if (frame->eptr >= md->end_subject ||
*frame->eptr >= 128 || (md->ctypes[*frame->eptr++] & ctype_word) == 0)
/* If min = max, continue at the same level without recursing */
- if (frame->min == frame->max) continue;
+ if (min == frame->max) continue;
/* If minimizing, we have to test the rest of the pattern before each
- subsequent match. Again, separate the UTF-8 case for speed, and also
- separate the UCP cases. */
+ subsequent match. */
- if (frame->minimize)
+ if (minimize)
{
{
- for (frame->fi = frame->min;; frame->fi++)
+ for (frame->fi = min;; frame->fi++)
{
- RMATCH(48, rrc, frame->ecode, frame->eptrb, 0);
+ RMATCH(48, frame->ecode, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (frame->fi >= frame->max || frame->eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
}
/* If maximizing it is worth using inline code for speed, doing the type
- test once at the start (i.e. keep it out of the loop). Again, keep the
- UTF-8 and UCP stuff separate. */
+ test once at the start (i.e. keep it out of the loop). */
else
{
if (frame->max < INT_MAX)
{
{
- for (i = frame->min; i < frame->max; i++)
+ for (i = min; i < frame->max; i++)
{
if (frame->eptr >= md->end_subject || IS_NEWLINE(*frame->eptr)) break;
frame->eptr++;
else
{
{
- for (i = frame->min; i < frame->max; i++)
+ for (i = min; i < frame->max; i++)
{
if (frame->eptr >= md->end_subject || IS_NEWLINE(*frame->eptr)) break;
frame->eptr++;
break;
case OP_NOT_DIGIT:
- for (i = frame->min; i < frame->max; i++)
+ for (i = min; i < frame->max; i++)
{
int len = 1;
if (frame->eptr >= md->end_subject) break;
break;
case OP_DIGIT:
- for (i = frame->min; i < frame->max; i++)
+ for (i = min; i < frame->max; i++)
{
int len = 1;
if (frame->eptr >= md->end_subject) break;
break;
case OP_NOT_WHITESPACE:
- for (i = frame->min; i < frame->max; i++)
+ for (i = min; i < frame->max; i++)
{
int len = 1;
if (frame->eptr >= md->end_subject) break;
break;
case OP_WHITESPACE:
- for (i = frame->min; i < frame->max; i++)
+ for (i = min; i < frame->max; i++)
{
int len = 1;
if (frame->eptr >= md->end_subject) break;
break;
case OP_NOT_WORDCHAR:
- for (i = frame->min; i < frame->max; i++)
+ for (i = min; i < frame->max; i++)
{
int len = 1;
if (frame->eptr >= md->end_subject) break;
break;
case OP_WORDCHAR:
- for (i = frame->min; i < frame->max; i++)
+ for (i = min; i < frame->max; i++)
{
int len = 1;
if (frame->eptr >= md->end_subject) break;
for(;;)
{
- RMATCH(52, rrc, frame->ecode, frame->eptrb, 0);
+ RMATCH(52, frame->ecode, frame->eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (frame->eptr-- == frame->pp) break; /* Stop if tried at original pos */
BACKCHAR(frame->eptr);
}
/* Control never gets here */
+ default:
+ /* Opening capturing bracket. If there is space in the offset vector, save
+ the current subject position in the working slot at the top of the vector. We
+ mustn't change the current values of the data slot, because they may be set
+ from a previous iteration of this group, and be referred to by a reference
+ inside the group.
+
+ If the bracket fails to match, we need to restore this value and also the
+ values of the final offsets, in case they were set by a previous iteration of
+ the same bracket.
+
+ If there isn't enough space in the offset vector, treat this as if it were a
+ non-capturing bracket. Don't worry about setting the flag for the error case
+ here; that is handled in the code for KET. */
+
+ if (*frame->ecode > OP_BRA)
+ {
+ frame->number = *frame->ecode - OP_BRA;
+
+ /* For extended extraction brackets (large number), we have to fish out the
+ number from a dummy opcode at the start. */
+
+ if (frame->number > EXTRACT_BASIC_MAX)
+ frame->number = GET2(frame->ecode, 2+LINK_SIZE);
+ frame->offset = frame->number << 1;
+
+#ifdef DEBUG
+ printf("start bracket %d subject=", frame->number);
+ pchars(frame->eptr, 16, TRUE, md);
+ printf("\n");
+#endif
+
+ if (frame->offset < md->offset_max)
+ {
+ frame->save_offset1 = md->offset_vector[frame->offset];
+ frame->save_offset2 = md->offset_vector[frame->offset+1];
+ frame->save_offset3 = md->offset_vector[md->offset_end - frame->number];
+
+ DPRINTF(("saving %d %d %d\n", frame->save_offset1, frame->save_offset2, frame->save_offset3));
+ md->offset_vector[md->offset_end - frame->number] = frame->eptr - md->start_subject;
+
+ do
+ {
+ RMATCH(1, frame->ecode + 1 + LINK_SIZE, frame->eptrb, match_isgroup);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ frame->ecode += GET(frame->ecode, 1);
+ }
+ while (*frame->ecode == OP_ALT);
+
+ DPRINTF(("bracket %d failed\n", frame->number));
+
+ md->offset_vector[frame->offset] = frame->save_offset1;
+ md->offset_vector[frame->offset+1] = frame->save_offset2;
+ md->offset_vector[md->offset_end - frame->number] = frame->save_offset3;
+
+ RRETURN(MATCH_NOMATCH);
+ }
+
+ /* Insufficient room for saving captured contents */
+
+ goto NON_CAPTURING_BRACKET;
+ }
+
/* There's been some horrible disaster. Since all codes > OP_BRA are
for capturing brackets, and there shouldn't be any gaps between 0 and
OP_BRA, arrival here can only mean there is something seriously wrong
in the code above or the OP_xxx definitions. */
- default:
DPRINTF(("Unknown opcode %d\n", *frame->ecode));
RRETURN(JS_REGEXP_ERROR_INTERNAL);
}