+2007-11-14 Eric Seidel <eric@webkit.org>
+
+ Reviewed by Geoff.
+
+ Another round of PCRE cleanups: inlines
+
+ SunSpider claims that this, combined with my previous PCRE cleanup were a 0.7% speedup, go figure.
+
+ * pcre/pcre_compile.cpp:
+ (jsRegExpCompile):
+ * pcre/pcre_exec.cpp:
+ (match):
+ (jsRegExpExecute):
+ * pcre/pcre_internal.h:
+ (PUT):
+ (GET):
+ (PUT2):
+ (GET2):
+ (isNewline):
+
2007-11-14 Eric Seidel <eric@webkit.org>
Reviewed by Sam.
return 0;
}
-pcre *
-jsRegExpCompile(const pcre_char* pattern, int patternLength,
+pcre* jsRegExpCompile(const pcre_char* pattern, int patternLength,
JSRegExpIgnoreCaseOption ignoreCase, JSRegExpMultilineOption multiline,
unsigned* numSubpatterns, const char** errorptr)
{
/* We can't pass back an error message if errorptr is NULL; I guess the best we
- can do is just return NULL, but we can set a code value if there is a code
- pointer. */
+ can do is just return NULL, but we can set a code value if there is a code pointer. */
if (!errorptr)
return 0;
-
*errorptr = NULL;
- /* Set up pointers to the individual character tables */
-
compile_data compile_block;
ErrorCode errorcode = ERR0;
if (length > MAX_PATTERN_SIZE)
return returnError(ERR16, errorptr);
- /* Compute the size of data block needed and get it. */
-
size_t size = length + sizeof(real_pcre);
real_pcre* re = reinterpret_cast<real_pcre*>(new char[size]);
/* Start of subject, or after internal newline if multiline. */
BEGIN_OPCODE(CIRC):
- if (frame->eptr != md->start_subject && (!md->multiline || !IS_NEWLINE(frame->eptr[-1])))
+ if (frame->eptr != md->start_subject && (!md->multiline || !isNewline(frame->eptr[-1])))
RRETURN_NO_MATCH;
frame->ecode++;
NEXT_OPCODE;
/* End of subject, or before internal newline if multiline. */
BEGIN_OPCODE(DOLL):
- if (frame->eptr < md->end_subject && (!md->multiline || !IS_NEWLINE(*frame->eptr)))
+ if (frame->eptr < md->end_subject && (!md->multiline || !isNewline(*frame->eptr)))
RRETURN_NO_MATCH;
frame->ecode++;
NEXT_OPCODE;
/* Match a single character type; inline for speed */
BEGIN_OPCODE(ANY):
- if (frame->eptr < md->end_subject && IS_NEWLINE(*frame->eptr))
+ if (frame->eptr < md->end_subject && isNewline(*frame->eptr))
RRETURN_NO_MATCH;
if (frame->eptr++ >= md->end_subject) RRETURN_NO_MATCH;
while (frame->eptr < md->end_subject && ISMIDCHAR(*frame->eptr)) frame->eptr++;
case OP_ANY:
for (i = 1; i <= min; i++)
{
- if (frame->eptr >= md->end_subject || IS_NEWLINE(*frame->eptr))
+ if (frame->eptr >= md->end_subject || isNewline(*frame->eptr))
RRETURN_NO_MATCH;
++frame->eptr;
while (frame->eptr < md->end_subject && ISMIDCHAR(*frame->eptr)) frame->eptr++;
switch(frame->ctype)
{
case OP_ANY:
- if (IS_NEWLINE(c)) RRETURN;
+ if (isNewline(c)) RRETURN;
break;
case OP_NOT_DIGIT:
{
for (i = min; i < frame->max; i++)
{
- if (frame->eptr >= md->end_subject || IS_NEWLINE(*frame->eptr)) break;
+ if (frame->eptr >= md->end_subject || isNewline(*frame->eptr)) break;
frame->eptr++;
while (frame->eptr < md->end_subject && (*frame->eptr & 0xc0) == 0x80) frame->eptr++;
}
{
for (i = min; i < frame->max; i++)
{
- if (frame->eptr >= md->end_subject || IS_NEWLINE(*frame->eptr)) break;
+ if (frame->eptr >= md->end_subject || isNewline(*frame->eptr)) break;
frame->eptr++;
}
break;
{
if (start_match > match_block.start_subject + start_offset)
{
- while (start_match < end_subject && !IS_NEWLINE(start_match[-1]))
+ while (start_match < end_subject && !isNewline(start_match[-1]))
start_match++;
}
}
#if LINK_SIZE == 2
-#define PUT(a,n,d) \
- (a[n] = (d) >> 8), \
- (a[(n)+1] = (d) & 255)
+static inline void PUT(uschar* a, size_t n, unsigned short d)
+{
+ a[n] = d >> 8;
+ a[n+1] = d & 255;
+}
-#define GET(a,n) \
- (((a)[n] << 8) | (a)[(n)+1])
+static inline short GET(const uschar* a, size_t n)
+{
+ return ((a[n] << 8) | a[n + 1]);
+}
#define MAX_PATTERN_SIZE (1 << 16)
-
#elif LINK_SIZE == 3
-#define PUT(a,n,d) \
- (a[n] = (d) >> 16), \
- (a[(n)+1] = (d) >> 8), \
- (a[(n)+2] = (d) & 255)
+static inline void PUT(uschar* a, size_t n, unsigned d)
+{
+ a[n] = d >> 16;
+ a[n+1] = d >> 8;
+ a[n+2] = d & 255;
+}
-#define GET(a,n) \
- (((a)[n] << 16) | ((a)[(n)+1] << 8) | (a)[(n)+2])
+static inline int GET(const uschar* a, size_t n)
+{
+ return ((a[n] << 16) | (a[n+1] << 8) | a[n+2]);
+}
#define MAX_PATTERN_SIZE (1 << 24)
-
#elif LINK_SIZE == 4
-#define PUT(a,n,d) \
- (a[n] = (d) >> 24), \
- (a[(n)+1] = (d) >> 16), \
- (a[(n)+2] = (d) >> 8), \
- (a[(n)+3] = (d) & 255)
+static inline void PUT(uschar* a, size_t n, unsigned d)
+{
+ a[n] = d >> 24;
+ a[n+1] = d >> 16;
+ a[n+2] = d >> 8;
+ a[n+3] = d & 255;
+}
-#define GET(a,n) \
- (((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3])
+static inline int GET(const uschar* a, size_t n)
+{
+ return ((a[n] << 24) | (a[n+1] << 16) | (a[n+2] << 8) | a[n+3]);
+}
#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */
-
#else
#error LINK_SIZE must be either 2, 3, or 4
#endif
offsets changes. There are used for repeat counts and for other things such as
capturing parenthesis numbers in back references. */
-#define PUT2(a,n,d) \
- a[n] = (d) >> 8; \
- a[(n)+1] = (d) & 255
+static inline void PUT2(uschar* a, size_t n, unsigned short d)
+{
+ a[n] = d >> 8;
+ a[n+1] = d & 255;
+}
-#define GET2(a,n) \
- (((a)[n] << 8) | (a)[(n)+1])
+static inline short GET2(const uschar* a, size_t n)
+{
+ return ((a[n] << 8) | a[n + 1]);
+}
+// FIXME: This can't be a static inline yet, because it's passed register values by some callers
+// you can't take the address of a register
#define PUT2INC(a,n,d) PUT2(a,n,d), a += 2
-
/* When UTF-8 encoding is being used, a character is no longer just a single
byte. The macros for character handling generate simple sequences when used in
byte-mode, and more complicated ones for UTF-8 characters. */
extern int _pcre_ucp_othercase(const int);
extern BOOL _pcre_xclass(int, const uschar *);
-#define IS_NEWLINE(nl) ((nl) == 0xA || (nl) == 0xD || (nl) == 0x2028 || (nl) == 0x2029)
+static inline bool isNewline(pcre_uchar nl)
+{
+ return (nl == 0xA || nl == 0xD || nl == 0x2028 || nl == 0x2029);
+}
#endif