2007-11-24 Eric Seidel <eric@webkit.org>
authoreric@webkit.org <eric@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Thu, 29 Nov 2007 11:23:11 +0000 (11:23 +0000)
committereric@webkit.org <eric@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Thu, 29 Nov 2007 11:23:11 +0000 (11:23 +0000)
        Reviewed by Sam.

        Give GET, PUT better names, and add (poor) moveOpcodePtrPastAnyAlternateBranches

        * pcre/pcre_compile.cpp:
        (firstSignificantOpCodeSkippingAssertions):
        (find_fixedlength):
        (complete_callout):
        (compile_branch):
        (compile_regex):
        (is_anchored):
        (canApplyFirstCharOptimization):
        (find_firstassertedchar):
        * pcre/pcre_exec.cpp:
        (match):
        * pcre/pcre_internal.h:
        (putOpcodeValueAtOffset):
        (getOpcodeValueAtOffset):
        (putOpcodeValueAtOffsetAndAdvance):
        (put2ByteOpcodeValueAtOffset):
        (get2ByteOpcodeValueAtOffset):
        (moveOpcodePtrPastAnyAlternateBranches):
        * pcre/pcre_ucp_searchfuncs.cpp:
        (_pcre_ucp_othercase):

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@28161 268f45cc-cd09-0410-ab3c-d52691b4dbfc

JavaScriptCore/ChangeLog
JavaScriptCore/pcre/pcre_compile.cpp
JavaScriptCore/pcre/pcre_exec.cpp
JavaScriptCore/pcre/pcre_internal.h
JavaScriptCore/pcre/pcre_ucp_searchfuncs.cpp

index 8a255f995b5f49e36b6b04b8d238c5bc7061c80e..ad3463d6a8188351001f9a12b6fd6bd84765a0fc 100644 (file)
         (MatchStack::pushNewFrame):
         (match):
 
+2007-11-24  Eric Seidel  <eric@webkit.org>
+
+        Reviewed by Sam.
+
+        Give GET, PUT better names, and add (poor) moveOpcodePtrPastAnyAlternateBranches
+
+        * pcre/pcre_compile.cpp:
+        (firstSignificantOpCodeSkippingAssertions):
+        (find_fixedlength):
+        (complete_callout):
+        (compile_branch):
+        (compile_regex):
+        (is_anchored):
+        (canApplyFirstCharOptimization):
+        (find_firstassertedchar):
+        * pcre/pcre_exec.cpp:
+        (match):
+        * pcre/pcre_internal.h:
+        (putOpcodeValueAtOffset):
+        (getOpcodeValueAtOffset):
+        (putOpcodeValueAtOffsetAndAdvance):
+        (put2ByteOpcodeValueAtOffset):
+        (get2ByteOpcodeValueAtOffset):
+        (moveOpcodePtrPastAnyAlternateBranches):
+        * pcre/pcre_ucp_searchfuncs.cpp:
+        (_pcre_ucp_othercase):
+
 2007-11-24  Eric Seidel  <eric@webkit.org>
 
         Reviewed by Sam.
index 062cbb526474e0fe0bb645be96595346b55421d1..88339d7178c0401f329e021ca588f6d80824fd88 100644 (file)
@@ -421,7 +421,7 @@ static const uschar* firstSignificantOpCodeSkippingAssertions(const uschar* code
         switch (*code) {
         case OP_ASSERT_NOT:
             do {
-                code += GET(code, 1);
+                code += getOpcodeValueAtOffset(code, 1);
             } while (*code == OP_ALT);
             code += OP_lengths[*code];
             break;
@@ -477,7 +477,9 @@ static int find_fixedlength(uschar* code, int options)
                 if (d < 0)
                     return d;
                 branchlength += d;
-                do cc += GET(cc, 1); while (*cc == OP_ALT);
+                do {
+                    cc += getOpcodeValueAtOffset(cc, 1);
+                } while (*cc == OP_ALT);
                 cc += 1 + LINK_SIZE;
                 break;
                 
@@ -504,7 +506,9 @@ static int find_fixedlength(uschar* code, int options)
                 
             case OP_ASSERT:
             case OP_ASSERT_NOT:
-                do cc += GET(cc, 1); while (*cc == OP_ALT);
+                do {
+                    cc += getOpcodeValueAtOffset(cc, 1);
+                } while (*cc == OP_ALT);
                 /* Fall through */
                 
                 /* Skip over things that don't match chars */
@@ -538,14 +542,14 @@ static int find_fixedlength(uschar* code, int options)
                  need to skip over a multibyte character in UTF8 mode.  */
                 
             case OP_EXACT:
-                branchlength += GET2(cc,1);
+                branchlength += get2ByteOpcodeValueAtOffset(cc,1);
                 cc += 4;
                 while((*cc & 0x80) == 0x80)
                     cc++;
                 break;
                 
             case OP_TYPEEXACT:
-                branchlength += GET2(cc,1);
+                branchlength += get2ByteOpcodeValueAtOffset(cc,1);
                 cc += 4;
                 break;
                 
@@ -565,7 +569,7 @@ static int find_fixedlength(uschar* code, int options)
                 /* Check a class for variable quantification */
                 
             case OP_XCLASS:
-                cc += GET(cc, 1) - 33;
+                cc += getOpcodeValueAtOffset(cc, 1) - 33;
                 /* Fall through */
                 
             case OP_CLASS:
@@ -581,9 +585,9 @@ static int find_fixedlength(uschar* code, int options)
                     
                 case OP_CRRANGE:
                 case OP_CRMINRANGE:
-                    if (GET2(cc, 1) != GET2(cc, 3))
+                    if (get2ByteOpcodeValueAtOffset(cc, 1) != get2ByteOpcodeValueAtOffset(cc, 3))
                         return -1;
-                    branchlength += GET2(cc, 1);
+                    branchlength += get2ByteOpcodeValueAtOffset(cc, 1);
                     cc += 5;
                     break;
                     
@@ -618,8 +622,8 @@ Arguments:
 
 static void complete_callout(uschar* previous_callout, const UChar* ptr, const CompileData& cd)
 {
-    int length = ptr - cd.start_pattern - GET(previous_callout, 2);
-    PUT(previous_callout, 2 + LINK_SIZE, length);
+    int length = ptr - cd.start_pattern - getOpcodeValueAtOffset(previous_callout, 2);
+    putOpcodeValueAtOffset(previous_callout, 2 + LINK_SIZE, length);
 }
 
 
@@ -1176,7 +1180,7 @@ compile_branch(int options, int* brackets, uschar** codeptr,
                     
                     /* Now fill in the complete length of the item */
                     
-                    PUT(previous, 1, code - previous);
+                    putOpcodeValueAtOffset(previous, 1, code - previous);
                     break;   /* End of class handling */
                 }
                 
@@ -1247,12 +1251,11 @@ compile_branch(int options, int* brackets, uschar** codeptr,
                  but if PCRE_UNGREEDY is set, it works the other way round. We change the
                  repeat type to the non-default. */
                 
-                if (ptr + 1 < patternEnd && ptr[1] == '?')
-                {
+                if (ptr + 1 < patternEnd && ptr[1] == '?') {
                     repeat_type = 1;
                     ptr++;
-                }
-                else repeat_type = 0;
+                } else
+                    repeat_type = 0;
                 
                 /* If previous was a character match, abolish the item and generate a
                  repeat item instead. If a char item has a minumum of more than one, ensure
@@ -1260,23 +1263,20 @@ compile_branch(int options, int* brackets, uschar** codeptr,
                  the first thing in a branch because the x will have gone into firstbyte
                  instead.  */
                 
-                if (*previous == OP_CHAR || *previous == OP_CHARNC)
-                {
+                if (*previous == OP_CHAR || *previous == OP_CHARNC) {
                     /* Deal with UTF-8 characters that take up more than one byte. It's
                      easier to write this out separately than try to macrify it. Use c to
                      hold the length of the character in bytes, plus 0x80 to flag that it's a
                      length rather than a small character. */
                     
-                    if ((code[-1] & 0x80) != 0)
-                    {
+                    if (code[-1] & 0x80) {
                         uschar *lastchar = code - 1;
                         while((*lastchar & 0xc0) == 0x80) lastchar--;
                         c = code - lastchar;            /* Length of UTF-8 character */
                         memcpy(utf8_char, lastchar, c); /* Save the char */
                         c |= 0x80;                      /* Flag c as a length */
                     }
-                    else
-                    {
+                    else {
                         c = code[-1];
                         if (repeat_min > 1)
                             reqbyte = c | req_caseopt | cd.req_varyopt;
@@ -1285,8 +1285,7 @@ compile_branch(int options, int* brackets, uschar** codeptr,
                     goto OUTPUT_SINGLE_REPEAT;   /* Code shared with single character types */
                 }
                 
-                else if (*previous == OP_ASCII_CHAR || *previous == OP_ASCII_LETTER_NC)
-                {
+                else if (*previous == OP_ASCII_CHAR || *previous == OP_ASCII_LETTER_NC) {
                     c = previous[1];
                     if (repeat_min > 1) reqbyte = c | req_caseopt | cd.req_varyopt;
                     goto OUTPUT_SINGLE_REPEAT;
@@ -1297,8 +1296,7 @@ compile_branch(int options, int* brackets, uschar** codeptr,
                  character repeats by setting opt_type to add a suitable offset into
                  repeat_type. OP_NOT is currently used only for single-byte chars. */
                 
-                else if (*previous == OP_NOT)
-                {
+                else if (*previous == OP_NOT) {
                     op_type = OP_NOTSTAR - OP_STAR;  /* Use "not" opcodes */
                     c = previous[1];
                     goto OUTPUT_SINGLE_REPEAT;
@@ -1308,17 +1306,15 @@ compile_branch(int options, int* brackets, uschar** codeptr,
                  create a suitable repeat item. The code is shared with single-character
                  repeats by setting op_type to add a suitable offset into repeat_type. */
                 
-                else if (*previous <= OP_ANY)
-                {
-                    uschar *oldcode;
-                    int prop_type, prop_value;
+                else if (*previous <= OP_ANY) {
                     op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */
                     c = *previous;
                     
                 OUTPUT_SINGLE_REPEAT:
-                    prop_type = prop_value = -1;
+                    int prop_type = -1;
+                    int prop_value = -1;
                     
-                    oldcode = code;
+                    uschar* oldcode = code;
                     code = previous;                  /* Usually overwrite previous item */
                     
                     /* If the maximum is zero then the minimum must also be zero; Perl allows
@@ -1341,7 +1337,7 @@ compile_branch(int options, int* brackets, uschar** codeptr,
                             *code++ = OP_QUERY + repeat_type;
                         else {
                             *code++ = OP_UPTO + repeat_type;
-                            PUT2INC(code, 0, repeat_max);
+                            put2ByteOpcodeValueAtOffsetAndAdvance(code, 0, repeat_max);
                         }
                     }
                     
@@ -1358,17 +1354,16 @@ compile_branch(int options, int* brackets, uschar** codeptr,
                             if (repeat_max == 1)
                                 goto END_REPEAT;
                             *code++ = OP_UPTO + repeat_type;
-                            PUT2INC(code, 0, repeat_max - 1);
+                            put2ByteOpcodeValueAtOffsetAndAdvance(code, 0, repeat_max - 1);
                         }
                     }
                     
                     /* The case {n,n} is just an EXACT, while the general case {n,m} is
                      handled as an EXACT followed by an UPTO. */
                     
-                    else
-                    {
+                    else {
                         *code++ = OP_EXACT + op_type;  /* NB EXACT doesn't have repeat_type */
-                        PUT2INC(code, 0, repeat_min);
+                        put2ByteOpcodeValueAtOffsetAndAdvance(code, 0, repeat_min);
                         
                         /* If the maximum is unlimited, insert an OP_STAR. Before doing so,
                          we have to insert the character for the previous code. For a repeated
@@ -1405,7 +1400,7 @@ compile_branch(int options, int* brackets, uschar** codeptr,
                             }
                             repeat_max -= repeat_min;
                             *code++ = OP_UPTO + repeat_type;
-                            PUT2INC(code, 0, repeat_max);
+                            put2ByteOpcodeValueAtOffsetAndAdvance(code, 0, repeat_max);
                         }
                     }
                     
@@ -1447,9 +1442,10 @@ compile_branch(int options, int* brackets, uschar** codeptr,
                         *code++ = OP_CRQUERY + repeat_type;
                     else {
                         *code++ = OP_CRRANGE + repeat_type;
-                        PUT2INC(code, 0, repeat_min);
-                        if (repeat_max == -1) repeat_max = 0;  /* 2-byte encoding for max */
-                        PUT2INC(code, 0, repeat_max);
+                        put2ByteOpcodeValueAtOffsetAndAdvance(code, 0, repeat_min);
+                        if (repeat_max == -1)
+                            repeat_max = 0;  /* 2-byte encoding for max */
+                        put2ByteOpcodeValueAtOffsetAndAdvance(code, 0, repeat_max);
                     }
                 }
                 
@@ -1469,7 +1465,9 @@ compile_branch(int options, int* brackets, uschar** codeptr,
                     
                     if (repeat_max == -1) {
                         uschar* ket = previous;
-                        do ket += GET(ket, 1); while (*ket != OP_KET);
+                        do {
+                            ket += getOpcodeValueAtOffset(ket, 1);
+                        } while (*ket != OP_KET);
                         ketoffset = code - ket;
                     }
                     
@@ -1520,9 +1518,9 @@ compile_branch(int options, int* brackets, uschar** codeptr,
                             /* We chain together the bracket offset fields that have to be
                              filled in later when the ends of the brackets are reached. */
                             
-                            int offset = (bralink == NULL)? 0 : previous - bralink;
+                            int offset = (!bralink) ? 0 : previous - bralink;
                             bralink = previous;
-                            PUTINC(previous, 0, offset);
+                            putOpcodeValueAtOffsetAndAdvance(previous, 0, offset);
                         }
                         
                         repeat_max--;
@@ -1563,7 +1561,7 @@ compile_branch(int options, int* brackets, uschar** codeptr,
                                 *code++ = OP_BRA;
                                 int offset = (!bralink) ? 0 : code - bralink;
                                 bralink = code;
-                                PUTINC(code, 0, offset);
+                                putOpcodeValueAtOffsetAndAdvance(code, 0, offset);
                             }
                             
                             memcpy(code, previous, len);
@@ -1576,11 +1574,11 @@ compile_branch(int options, int* brackets, uschar** codeptr,
                         while (bralink) {
                             int offset = code - bralink + 1;
                             uschar* bra = code - offset;
-                            int oldlinkoffset = GET(bra, 1);
-                            bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;
+                            int oldlinkoffset = getOpcodeValueAtOffset(bra, 1);
+                            bralink = oldlinkoffset ? 0 : bralink - oldlinkoffset;
                             *code++ = OP_KET;
-                            PUTINC(code, 0, offset);
-                            PUT(bra, 1, offset);
+                            putOpcodeValueAtOffsetAndAdvance(code, 0, offset);
+                            putOpcodeValueAtOffset(bra, 1, offset);
                         }
                     }
                     
@@ -1653,7 +1651,7 @@ compile_branch(int options, int* brackets, uschar** codeptr,
                     if (++(*brackets) > EXTRACT_BASIC_MAX) {
                         bravalue = OP_BRA + EXTRACT_BASIC_MAX + 1;
                         code[1 + LINK_SIZE] = OP_BRANUMBER;
-                        PUT2(code, 2+LINK_SIZE, *brackets);
+                        put2ByteOpcodeValueAtOffset(code, 2+LINK_SIZE, *brackets);
                         skipbytes = 3;
                     }
                     else
@@ -1665,7 +1663,7 @@ compile_branch(int options, int* brackets, uschar** codeptr,
                  to pass its address because some compilers complain otherwise. Pass in a
                  new setting for the ims options if they have changed. */
                 
-                previous = (bravalue >= OP_ONCE)? code : NULL;
+                previous = (bravalue >= OP_ONCE) ? code : 0;
                 *code = bravalue;
                 tempcode = code;
                 tempreqvary = cd.req_varyopt;     /* Save value before bracket */
@@ -1785,7 +1783,7 @@ compile_branch(int options, int* brackets, uschar** codeptr,
                         int number = -c - ESC_REF;
                         previous = code;
                         *code++ = OP_REF;
-                        PUT2INC(code, 0, number);
+                        put2ByteOpcodeValueAtOffsetAndAdvance(code, 0, number);
                     }
                     
                     /* For the rest, we can obtain the OP value by negating the escape
@@ -1918,7 +1916,7 @@ compile_regex(int options, int* brackets, uschar** codeptr,
     
     /* Offset is set zero to mark that this bracket is still open */
     
-    PUT(code, 1, 0);
+    putOpcodeValueAtOffset(code, 1, 0);
     code += 1 + LINK_SIZE + skipbytes;
     
     /* Loop for each alternative branch */
@@ -1982,8 +1980,8 @@ compile_regex(int options, int* brackets, uschar** codeptr,
         if (ptr >= patternEnd || *ptr != '|') {
             int length = code - last_branch;
             do {
-                int prev_length = GET(last_branch, 1);
-                PUT(last_branch, 1, length);
+                int prev_length = getOpcodeValueAtOffset(last_branch, 1);
+                putOpcodeValueAtOffset(last_branch, 1, length);
                 length = prev_length;
                 last_branch -= length;
             } while (length > 0);
@@ -1991,7 +1989,7 @@ compile_regex(int options, int* brackets, uschar** codeptr,
             /* Fill in the ket */
             
             *code = OP_KET;
-            PUT(code, 1, code - start_bracket);
+            putOpcodeValueAtOffset(code, 1, code - start_bracket);
             code += 1 + LINK_SIZE;
             
             /* Set values to pass back */
@@ -2009,7 +2007,7 @@ compile_regex(int options, int* brackets, uschar** codeptr,
          zero offset until it is closed, making it possible to detect recursion. */
         
         *code = OP_ALT;
-        PUT(code, 1, code - last_branch);
+        putOpcodeValueAtOffset(code, 1, code - last_branch);
         last_branch = code;
         code += 1 + LINK_SIZE;
         ptr++;
@@ -2066,7 +2064,7 @@ static bool is_anchored(const uschar* code, int options, unsigned int bracket_ma
         if (op > OP_BRA) {
             op -= OP_BRA;
             if (op > EXTRACT_BASIC_MAX)
-                op = GET2(scode, 2 + LINK_SIZE);
+                op = get2ByteOpcodeValueAtOffset(scode, 2 + LINK_SIZE);
             int new_map = bracket_map | ((op < 32)? (1 << op) : 1);
             if (!is_anchored(scode, options, new_map, backref_map))
                 return false;
@@ -2081,7 +2079,7 @@ static bool is_anchored(const uschar* code, int options, unsigned int bracket_ma
         
         else if ((options & PCRE_MULTILINE) || op != OP_CIRC)
             return false;
-        code += GET(code, 1);
+        code += getOpcodeValueAtOffset(code, 1);
     } while (*code == OP_ALT);   /* Loop for each alternative */
     return true;
 }
@@ -2117,7 +2115,7 @@ static bool canApplyFirstCharOptimization(const uschar* code, unsigned int brack
         if (op > OP_BRA) {
             op -= OP_BRA;
             if (op > EXTRACT_BASIC_MAX)
-                op = GET2(scode, 2+LINK_SIZE);
+                op = get2ByteOpcodeValueAtOffset(scode, 2+LINK_SIZE);
             int new_map = bracket_map | ((op < 32)? (1 << op) : 1);
             if (!canApplyFirstCharOptimization(scode, new_map, backref_map))
                 return false;
@@ -2139,7 +2137,7 @@ static bool canApplyFirstCharOptimization(const uschar* code, unsigned int brack
         
         /* Move on to the next alternative */
         
-        code += GET(code, 1);
+        code += getOpcodeValueAtOffset(code, 1);
     } while (*code == OP_ALT);  /* Loop for each alternative */
     return true;
 }
@@ -2212,7 +2210,7 @@ static int find_firstassertedchar(const uschar* code, int options, bool inassert
             break;
         }
         
-        code += GET(code, 1);
+        code += getOpcodeValueAtOffset(code, 1);
     } while (*code == OP_ALT);
     return c;
 }
index dad38e73eaf616796af128b9b466e4582dfadd53..323d5e774008e73b2793a7b8214d6d4fcfb479e2 100644 (file)
@@ -124,7 +124,7 @@ struct MatchFrame {
 doing traditional NFA matching, so that they are thread-safe. */
 
 struct MatchData {
-  unsigned long int match_call_count;      /* As it says */
+  unsigned long int match_call_count;
   int*   offset_vector;         /* Offset vector */
   int    offset_end;            /* One past the end */
   int    offset_max;            /* The maximum usable for return data */
@@ -513,7 +513,7 @@ RECURSE:
                     RMATCH(2, stack.currentFrame->args.ecode + 1 + LINK_SIZE, stack.currentFrame->args.eptrb, match_isgroup);
                     if (is_match)
                         RRETURN;
-                    stack.currentFrame->args.ecode += GET(stack.currentFrame->args.ecode, 1);
+                    stack.currentFrame->args.ecode += getOpcodeValueAtOffset(stack.currentFrame->args.ecode, 1);
                 } while (*stack.currentFrame->args.ecode == OP_ALT);
                 DPRINTF(("bracket 0 failed\n"));
                 RRETURN;
@@ -543,7 +543,7 @@ RECURSE:
                     RMATCH(6, stack.currentFrame->args.ecode + 1 + LINK_SIZE, NULL, match_isgroup);
                     if (is_match)
                         break;
-                    stack.currentFrame->args.ecode += GET(stack.currentFrame->args.ecode, 1);
+                    stack.currentFrame->args.ecode += getOpcodeValueAtOffset(stack.currentFrame->args.ecode, 1);
                 } while (*stack.currentFrame->args.ecode == OP_ALT);
                 if (*stack.currentFrame->args.ecode == OP_KET)
                     RRETURN_NO_MATCH;
@@ -551,7 +551,7 @@ RECURSE:
                 /* Continue from after the assertion, updating the offsets high water
                  mark, since extracts may have been taken during the assertion. */
                 
-                do stack.currentFrame->args.ecode += GET(stack.currentFrame->args.ecode,1); while (*stack.currentFrame->args.ecode == OP_ALT);
+                moveOpcodePtrPastAnyAlternateBranches(stack.currentFrame->args.ecode);
                 stack.currentFrame->args.ecode += 1 + LINK_SIZE;
                 stack.currentFrame->args.offset_top = md.end_offset_top;
                 NEXT_OPCODE;
@@ -563,7 +563,7 @@ RECURSE:
                     RMATCH(7, stack.currentFrame->args.ecode + 1 + LINK_SIZE, NULL, match_isgroup);
                     if (is_match)
                         RRETURN_NO_MATCH;
-                    stack.currentFrame->args.ecode += GET(stack.currentFrame->args.ecode,1);
+                    stack.currentFrame->args.ecode += getOpcodeValueAtOffset(stack.currentFrame->args.ecode, 1);
                 } while (*stack.currentFrame->args.ecode == OP_ALT);
                 
                 stack.currentFrame->args.ecode += 1 + LINK_SIZE;
@@ -584,7 +584,7 @@ RECURSE:
                     RMATCH(9, stack.currentFrame->args.ecode + 1 + LINK_SIZE, stack.currentFrame->args.eptrb, match_isgroup);
                     if (is_match)
                         break;
-                    stack.currentFrame->args.ecode += GET(stack.currentFrame->args.ecode,1);
+                    stack.currentFrame->args.ecode += getOpcodeValueAtOffset(stack.currentFrame->args.ecode, 1);
                 } while (*stack.currentFrame->args.ecode == OP_ALT);
                 
                 /* If hit the end of the group (which could be repeated), fail */
@@ -595,9 +595,7 @@ RECURSE:
                 /* Continue as from after the assertion, updating the offsets high water
                  mark, since extracts may have been taken. */
                 
-                do {
-                    stack.currentFrame->args.ecode += GET(stack.currentFrame->args.ecode,1);
-                } while (*stack.currentFrame->args.ecode == OP_ALT);
+                moveOpcodePtrPastAnyAlternateBranches(stack.currentFrame->args.ecode);
                 
                 stack.currentFrame->args.offset_top = md.end_offset_top;
                 stack.currentFrame->args.eptr = md.end_match_ptr;
@@ -639,7 +637,7 @@ RECURSE:
                  bracketed group and go to there. */
                 
                 BEGIN_OPCODE(ALT):
-                do stack.currentFrame->args.ecode += GET(stack.currentFrame->args.ecode,1); while (*stack.currentFrame->args.ecode == OP_ALT);
+                moveOpcodePtrPastAnyAlternateBranches(stack.currentFrame->args.ecode);
                 NEXT_OPCODE;
                 
                 /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
@@ -654,9 +652,7 @@ RECURSE:
                     RMATCH(14, stack.currentFrame->locals.next, stack.currentFrame->args.eptrb, match_isgroup);
                     if (is_match)
                         RRETURN;
-                    do {
-                        stack.currentFrame->locals.next += GET(stack.currentFrame->locals.next, 1);
-                    } while (*stack.currentFrame->locals.next == OP_ALT);
+                    moveOpcodePtrPastAnyAlternateBranches(stack.currentFrame->locals.next);
                     stack.currentFrame->args.ecode = stack.currentFrame->locals.next + 1 + LINK_SIZE;
                 }
                 NEXT_OPCODE;
@@ -664,9 +660,7 @@ RECURSE:
                 BEGIN_OPCODE(BRAMINZERO):
                 {
                     stack.currentFrame->locals.next = stack.currentFrame->args.ecode + 1;
-                    do {
-                        stack.currentFrame->locals.next += GET(stack.currentFrame->locals.next, 1);
-                    } while (*stack.currentFrame->locals.next == OP_ALT);
+                    moveOpcodePtrPastAnyAlternateBranches(stack.currentFrame->locals.next);
                     RMATCH(15, stack.currentFrame->locals.next + 1 + LINK_SIZE, stack.currentFrame->args.eptrb, match_isgroup);
                     if (is_match)
                         RRETURN;
@@ -682,7 +676,7 @@ RECURSE:
                 BEGIN_OPCODE(KET):
                 BEGIN_OPCODE(KETRMIN):
                 BEGIN_OPCODE(KETRMAX):
-                stack.currentFrame->locals.prev = stack.currentFrame->args.ecode - GET(stack.currentFrame->args.ecode, 1);
+                stack.currentFrame->locals.prev = stack.currentFrame->args.ecode - getOpcodeValueAtOffset(stack.currentFrame->args.ecode, 1);
                 stack.currentFrame->locals.saved_eptr = stack.currentFrame->args.eptrb->epb_saved_eptr;
                 
                 /* Back up the stack of bracket start pointers. */
@@ -706,7 +700,7 @@ RECURSE:
                  the number from a dummy opcode at the start. */
                 
                 if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX)
-                    stack.currentFrame->locals.number = GET2(stack.currentFrame->locals.prev, 2+LINK_SIZE);
+                    stack.currentFrame->locals.number = get2ByteOpcodeValueAtOffset(stack.currentFrame->locals.prev, 2+LINK_SIZE);
                 stack.currentFrame->locals.offset = stack.currentFrame->locals.number << 1;
                 
 #ifdef DEBUG
@@ -883,7 +877,7 @@ RECURSE:
                  loops). */
                 
                 BEGIN_OPCODE(REF):
-                stack.currentFrame->locals.offset = GET2(stack.currentFrame->args.ecode, 1) << 1;               /* Doubled ref number */
+                stack.currentFrame->locals.offset = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.ecode, 1) << 1;               /* Doubled ref number */
                 stack.currentFrame->args.ecode += 3;                                 /* Advance past item */
                 
                 /* If the reference is unset, set the length to be longer than the amount
@@ -916,8 +910,8 @@ RECURSE:
                 case OP_CRRANGE:
                 case OP_CRMINRANGE:
                     minimize = (*stack.currentFrame->args.ecode == OP_CRMINRANGE);
-                    min = GET2(stack.currentFrame->args.ecode, 1);
-                    stack.currentFrame->locals.max = GET2(stack.currentFrame->args.ecode, 3);
+                    min = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.ecode, 1);
+                    stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.ecode, 3);
                     if (stack.currentFrame->locals.max == 0)
                         stack.currentFrame->locals.max = INT_MAX;
                     stack.currentFrame->args.ecode += 5;
@@ -1017,8 +1011,8 @@ RECURSE:
                 case OP_CRRANGE:
                 case OP_CRMINRANGE:
                     minimize = (*stack.currentFrame->args.ecode == OP_CRMINRANGE);
-                    min = GET2(stack.currentFrame->args.ecode, 1);
-                    stack.currentFrame->locals.max = GET2(stack.currentFrame->args.ecode, 3);
+                    min = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.ecode, 1);
+                    stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.ecode, 3);
                     if (stack.currentFrame->locals.max == 0)
                         stack.currentFrame->locals.max = INT_MAX;
                     stack.currentFrame->args.ecode += 5;
@@ -1106,7 +1100,7 @@ RECURSE:
                 
                 BEGIN_OPCODE(XCLASS):
                 stack.currentFrame->locals.data = stack.currentFrame->args.ecode + 1 + LINK_SIZE;                /* Save for matching */
-                stack.currentFrame->args.ecode += GET(stack.currentFrame->args.ecode, 1);                      /* Advance past the item */
+                stack.currentFrame->args.ecode += getOpcodeValueAtOffset(stack.currentFrame->args.ecode, 1);                      /* Advance past the item */
                 
                 switch (*stack.currentFrame->args.ecode) {
                 case OP_CRSTAR:
@@ -1126,8 +1120,8 @@ RECURSE:
                 case OP_CRRANGE:
                 case OP_CRMINRANGE:
                     minimize = (*stack.currentFrame->args.ecode == OP_CRMINRANGE);
-                    min = GET2(stack.currentFrame->args.ecode, 1);
-                    stack.currentFrame->locals.max = GET2(stack.currentFrame->args.ecode, 3);
+                    min = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.ecode, 1);
+                    stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.ecode, 3);
                     if (stack.currentFrame->locals.max == 0)
                         stack.currentFrame->locals.max = INT_MAX;
                     stack.currentFrame->args.ecode += 5;
@@ -1276,7 +1270,7 @@ RECURSE:
                 /* Match a single character repeatedly; different opcodes share code. */
                 
                 BEGIN_OPCODE(EXACT):
-                min = stack.currentFrame->locals.max = GET2(stack.currentFrame->args.ecode, 1);
+                min = stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.ecode, 1);
                 minimize = false;
                 stack.currentFrame->args.ecode += 3;
                 goto REPEATCHAR;
@@ -1284,7 +1278,7 @@ RECURSE:
                 BEGIN_OPCODE(UPTO):
                 BEGIN_OPCODE(MINUPTO):
                 min = 0;
-                stack.currentFrame->locals.max = GET2(stack.currentFrame->args.ecode, 1);
+                stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.ecode, 1);
                 minimize = *stack.currentFrame->args.ecode == OP_MINUPTO;
                 stack.currentFrame->args.ecode += 3;
                 goto REPEATCHAR;
@@ -1435,7 +1429,7 @@ RECURSE:
                  about... */
                 
                 BEGIN_OPCODE(NOTEXACT):
-                min = stack.currentFrame->locals.max = GET2(stack.currentFrame->args.ecode, 1);
+                min = stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.ecode, 1);
                 minimize = false;
                 stack.currentFrame->args.ecode += 3;
                 goto REPEATNOTCHAR;
@@ -1443,7 +1437,7 @@ RECURSE:
                 BEGIN_OPCODE(NOTUPTO):
                 BEGIN_OPCODE(NOTMINUPTO):
                 min = 0;
-                stack.currentFrame->locals.max = GET2(stack.currentFrame->args.ecode, 1);
+                stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.ecode, 1);
                 minimize = *stack.currentFrame->args.ecode == OP_NOTMINUPTO;
                 stack.currentFrame->args.ecode += 3;
                 goto REPEATNOTCHAR;
@@ -1609,7 +1603,7 @@ RECURSE:
                  repeat it in the interests of efficiency. */
                 
                 BEGIN_OPCODE(TYPEEXACT):
-                min = stack.currentFrame->locals.max = GET2(stack.currentFrame->args.ecode, 1);
+                min = stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.ecode, 1);
                 minimize = true;
                 stack.currentFrame->args.ecode += 3;
                 goto REPEATTYPE;
@@ -1617,7 +1611,7 @@ RECURSE:
                 BEGIN_OPCODE(TYPEUPTO):
                 BEGIN_OPCODE(TYPEMINUPTO):
                 min = 0;
-                stack.currentFrame->locals.max = GET2(stack.currentFrame->args.ecode, 1);
+                stack.currentFrame->locals.max = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.ecode, 1);
                 minimize = *stack.currentFrame->args.ecode == OP_TYPEMINUPTO;
                 stack.currentFrame->args.ecode += 3;
                 goto REPEATTYPE;
@@ -1948,7 +1942,7 @@ RECURSE:
                  number from a dummy opcode at the start. */
                 
                 if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX)
-                    stack.currentFrame->locals.number = GET2(stack.currentFrame->args.ecode, 2+LINK_SIZE);
+                    stack.currentFrame->locals.number = get2ByteOpcodeValueAtOffset(stack.currentFrame->args.ecode, 2+LINK_SIZE);
                 stack.currentFrame->locals.offset = stack.currentFrame->locals.number << 1;
                 
 #ifdef DEBUG
@@ -1969,7 +1963,7 @@ RECURSE:
                         RMATCH(1, stack.currentFrame->args.ecode + 1 + LINK_SIZE, stack.currentFrame->args.eptrb, match_isgroup);
                         if (is_match)
                             RRETURN;
-                        stack.currentFrame->args.ecode += GET(stack.currentFrame->args.ecode, 1);
+                        stack.currentFrame->args.ecode += getOpcodeValueAtOffset(stack.currentFrame->args.ecode, 1);
                     } while (*stack.currentFrame->args.ecode == OP_ALT);
                     
                     DPRINTF(("bracket %d failed\n", stack.currentFrame->locals.number));
index f22a60f9360f3f86efe1d600db797e9528afad9b..627522e517123d348cc7d9521eddc26c83c841a4 100644 (file)
@@ -167,29 +167,29 @@ is automated on Unix systems via the "configure" command. */
 
 #if LINK_SIZE == 2
 
-static inline void PUT(uschar* a, size_t n, unsigned short d)
+static inline void putOpcodeValueAtOffset(uschar* opcodePtr, size_t offset, unsigned short value)
 {
-    a[n] = d >> 8;
-    a[n+1] = d & 255;
+    opcodePtr[offset] = value >> 8;
+    opcodePtr[offset + 1] = value & 255;
 }
 
-static inline short GET(const uschar* a, size_t n)
+static inline short getOpcodeValueAtOffset(const uschar* opcodePtr, size_t offset)
 {
-    return ((a[n] << 8) | a[n + 1]);
+    return ((opcodePtr[offset] << 8) | opcodePtr[offset + 1]);
 }
 
 #define MAX_PATTERN_SIZE (1 << 16)
 
 #elif LINK_SIZE == 3
 
-static inline void PUT(uschar* a, size_t n, unsigned d)
+static inline void putOpcodeValueAtOffset(uschar* a, size_t n, unsigned d)
 {
     a[n] = d >> 16;
     a[n+1] = d >> 8;
     a[n+2] = d & 255;
 }
 
-static inline int GET(const uschar* a, size_t n)
+static inline int getOpcodeValueAtOffset(const uschar* a, size_t n)
 {
     return ((a[n] << 16) | (a[n+1] << 8) | a[n+2]);
 }
@@ -198,7 +198,7 @@ static inline int GET(const uschar* a, size_t n)
 
 #elif LINK_SIZE == 4
 
-static inline void PUT(uschar* a, size_t n, unsigned d)
+static inline void putOpcodeValueAtOffset(uschar* a, size_t n, unsigned d)
 {
     a[n] = d >> 24;
     a[n+1] = d >> 16;
@@ -206,7 +206,7 @@ static inline void PUT(uschar* a, size_t n, unsigned d)
     a[n+3] = d & 255;
 }
 
-static inline int GET(const uschar* a, size_t n)
+static inline int getOpcodeValueAtOffset(const uschar* a, size_t n)
 {
     return ((a[n] << 24) | (a[n+1] << 16) | (a[n+2] << 8) | a[n+3]);
 }
@@ -217,30 +217,30 @@ static inline int GET(const uschar* a, size_t n)
 #error LINK_SIZE must be either 2, 3, or 4
 #endif
 
-
-/* Convenience macro defined in terms of the others */
-
-#define PUTINC(a,n,d)   PUT(a,n,d), a += LINK_SIZE
-
+static inline void putOpcodeValueAtOffsetAndAdvance(uschar*& a, size_t n, unsigned short d)
+{
+    putOpcodeValueAtOffset(a, n, d);
+    a += LINK_SIZE;
+}
 
 /* PCRE uses some other 2-byte quantities that do not change when the size of
 offsets changes. There are used for repeat counts and for other things such as
 capturing parenthesis numbers in back references. */
 
-static inline void PUT2(uschar* a, size_t n, unsigned short d)
+static inline void put2ByteOpcodeValueAtOffset(uschar* a, size_t n, unsigned short d)
 {
     a[n] = d >> 8;
     a[n+1] = d & 255;
 }
 
-static inline short GET2(const uschar* a, size_t n)
+static inline short get2ByteOpcodeValueAtOffset(const uschar* a, size_t n)
 {
     return ((a[n] << 8) | a[n + 1]);
 }
 
-static inline void PUT2INC(uschar*& a, size_t n, unsigned short d)
+static inline void put2ByteOpcodeValueAtOffsetAndAdvance(uschar*& a, size_t n, unsigned short d)
 {
-    PUT2(a,n,d);
+    put2ByteOpcodeValueAtOffset(a, n, d);
     a += 2;
 }
 
@@ -587,6 +587,17 @@ static inline bool isNewline(UChar nl)
     return (nl == 0xA || nl == 0xD || nl == 0x2028 || nl == 0x2029);
 }
 
+// FIXME: It's unclear to me if this moves the opcode ptr to the start of all branches
+// or to the end of all branches -- ecs
+// FIXME: This abstraction is poor since it assumes that you want to jump based on whatever
+// the next value in the stream is, and *then* follow any OP_ALT branches.
+static inline void moveOpcodePtrPastAnyAlternateBranches(const uschar*& opcodePtr)
+{
+    do {
+        opcodePtr += getOpcodeValueAtOffset(opcodePtr, 1);
+    } while (*opcodePtr == OP_ALT);
+}
+
 #endif
 
 #endif
index cde777616038993737fb348d9efeaad7a900df06..fbc592cbc2244327a98404c22e1e39f8fb428789 100644 (file)
@@ -79,9 +79,7 @@ int _pcre_ucp_othercase(const unsigned c)
         if (c < (ucp_table[mid].f0 & f0_charmask))
             top = mid;
         else {
-            if ((ucp_table[mid].f0 & f0_rangeflag) != 0 &&
-                c <= (ucp_table[mid].f0 & f0_charmask) +
-                (ucp_table[mid].f1 & f1_rangemask))
+            if ((ucp_table[mid].f0 & f0_rangeflag) && (c <= (ucp_table[mid].f0 & f0_charmask) + (ucp_table[mid].f1 & f1_rangemask)))
                 break;
             bot = mid + 1;
         }