[JSC] There should be a debug option to dump a compiled RegExp Pattern
authormsaboff@apple.com <msaboff@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Tue, 18 Jul 2017 16:25:42 +0000 (16:25 +0000)
committermsaboff@apple.com <msaboff@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Tue, 18 Jul 2017 16:25:42 +0000 (16:25 +0000)
https://bugs.webkit.org/show_bug.cgi?id=174601

Reviewed by Alex Christensen.

Added the debug option dumpCompiledRegExpPatterns which will dump the YarrPattern and related
objects after a regular expression has been compiled.

* runtime/Options.h:
* yarr/YarrPattern.cpp:
(JSC::Yarr::YarrPattern::compile):
(JSC::Yarr::indentForNestingLevel):
(JSC::Yarr::dumpUChar32):
(JSC::Yarr::PatternAlternative::dump):
(JSC::Yarr::PatternTerm::dumpQuantifier):
(JSC::Yarr::PatternTerm::dump):
(JSC::Yarr::PatternDisjunction::dump):
(JSC::Yarr::YarrPattern::dumpPattern):
* yarr/YarrPattern.h:
(JSC::Yarr::YarrPattern::global):

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@219611 268f45cc-cd09-0410-ab3c-d52691b4dbfc

Source/JavaScriptCore/ChangeLog
Source/JavaScriptCore/runtime/Options.h
Source/JavaScriptCore/yarr/YarrPattern.cpp
Source/JavaScriptCore/yarr/YarrPattern.h

index 0a6584a..9797931 100644 (file)
@@ -1,3 +1,26 @@
+2017-07-18  Michael Saboff  <msaboff@apple.com>
+
+        [JSC] There should be a debug option to dump a compiled RegExp Pattern
+        https://bugs.webkit.org/show_bug.cgi?id=174601
+
+        Reviewed by Alex Christensen.
+
+        Added the debug option dumpCompiledRegExpPatterns which will dump the YarrPattern and related
+        objects after a regular expression has been compiled.
+
+        * runtime/Options.h:
+        * yarr/YarrPattern.cpp:
+        (JSC::Yarr::YarrPattern::compile):
+        (JSC::Yarr::indentForNestingLevel):
+        (JSC::Yarr::dumpUChar32):
+        (JSC::Yarr::PatternAlternative::dump):
+        (JSC::Yarr::PatternTerm::dumpQuantifier):
+        (JSC::Yarr::PatternTerm::dump):
+        (JSC::Yarr::PatternDisjunction::dump):
+        (JSC::Yarr::YarrPattern::dumpPattern):
+        * yarr/YarrPattern.h:
+        (JSC::Yarr::YarrPattern::global):
+
 2017-07-17  Darin Adler  <darin@apple.com>
 
         Improve use of NeverDestroyed
index a321ffe..6737f75 100644 (file)
@@ -432,6 +432,8 @@ typedef const char* optionString;
     \
     v(unsigned, prototypeHitCountForLLIntCaching, 2, Normal, "Number of prototype property hits before caching a prototype in the LLInt. A count of 0 means never cache.") \
     \
+    v(bool, dumpCompiledRegExpPatterns, false, Normal, nullptr) \
+    \
     v(bool, dumpModuleRecord, false, Normal, nullptr) \
     v(bool, dumpModuleLoadingState, false, Normal, nullptr) \
     v(bool, exposeInternalModuleLoader, false, Normal, "expose the internal module loader object to the global space for debugging") \
index 1c2dd75..3a11090 100644 (file)
 #include "config.h"
 #include "YarrPattern.h"
 
+#include "Options.h"
 #include "Yarr.h"
 #include "YarrCanonicalize.h"
 #include "YarrParser.h"
+#include <wtf/DataLog.h>
 #include <wtf/Vector.h>
 #include <wtf/WTFThreadData.h>
 
@@ -958,6 +960,9 @@ const char* YarrPattern::compile(const String& patternString, void* stackLimit)
     if (const char* error = constructor.setupOffsets())
         return error;
 
+    if (Options::dumpCompiledRegExpPatterns())
+        dumpPattern(patternString);
+
     return nullptr;
 }
 
@@ -983,4 +988,281 @@ YarrPattern::YarrPattern(const String& pattern, RegExpFlags flags, const char**
     *error = compile(pattern, stackLimit);
 }
 
+static void indentForNestingLevel(PrintStream& out, unsigned nestingDepth)
+{
+    out.print("    ");
+    for (; nestingDepth; --nestingDepth)
+        out.print("  ");
+}
+
+static void dumpUChar32(PrintStream& out, UChar32 c)
+{
+    if (c >= ' '&& c <= 0xff)
+        out.printf("'%c'", static_cast<char>(c));
+    else
+        out.printf("0x%04x", c);
+}
+
+void PatternAlternative::dump(PrintStream& out, YarrPattern* thisPattern, unsigned nestingDepth)
+{
+    out.print("minimum size: ", m_minimumSize);
+    if (m_hasFixedSize)
+        out.print(",fixed size");
+    if (m_onceThrough)
+        out.print(",once through");
+    if (m_startsWithBOL)
+        out.print(",starts with ^");
+    if (m_containsBOL)
+        out.print(",contains ^");
+    out.print("\n");
+
+    for (size_t i = 0; i < m_terms.size(); ++i)
+        m_terms[i].dump(out, thisPattern, nestingDepth);
+}
+
+void PatternTerm::dumpQuantifier(PrintStream& out)
+{
+    if (quantityType == QuantifierFixedCount && quantityMinCount == 1 && quantityMaxCount == 1)
+        return;
+    out.print(" {", quantityMinCount.unsafeGet());
+    if (quantityMinCount != quantityMaxCount) {
+        if (quantityMaxCount == UINT_MAX)
+            out.print(",...");
+        else
+            out.print(",", quantityMaxCount.unsafeGet());
+    }
+    out.print("}");
+    if (quantityType == QuantifierGreedy)
+        out.print(" greedy");
+    else if (quantityType == QuantifierNonGreedy)
+        out.print(" non-greedy");
+}
+
+void PatternTerm::dump(PrintStream& out, YarrPattern* thisPattern, unsigned nestingDepth)
+{
+    indentForNestingLevel(out, nestingDepth);
+
+    if (invert() && (type != TypeParenthesesSubpattern && type != TypeParentheticalAssertion))
+        out.print("not ");
+
+    switch (type) {
+    case TypeAssertionBOL:
+        out.println("BOL");
+        break;
+    case TypeAssertionEOL:
+        out.println("EOL");
+        break;
+    case TypeAssertionWordBoundary:
+        out.println("word boundary");
+        break;
+    case TypePatternCharacter:
+        out.printf("character ");
+        if (thisPattern->ignoreCase() && isASCIIAlpha(patternCharacter)) {
+            dumpUChar32(out, toASCIIUpper(patternCharacter));
+            out.print("/");
+            dumpUChar32(out, toASCIILower(patternCharacter));
+        } else
+            dumpUChar32(out, patternCharacter);
+        dumpQuantifier(out);
+        if (quantityType != QuantifierFixedCount)
+            out.print(",frame location ", frameLocation);
+        out.println();
+        break;
+    case TypeCharacterClass:
+        out.print("character class ");
+        if (characterClass == thisPattern->newlineCharacterClass())
+            out.print("<newline>");
+        else if (characterClass == thisPattern->digitsCharacterClass())
+            out.print("<digits>");
+        else if (characterClass == thisPattern->spacesCharacterClass())
+            out.print("<whitespace>");
+        else if (characterClass == thisPattern->wordcharCharacterClass())
+            out.print("<word>");
+        else if (characterClass == thisPattern->wordUnicodeIgnoreCaseCharCharacterClass())
+            out.print("<unicode ignore case>");
+        else if (characterClass == thisPattern->nondigitsCharacterClass())
+            out.print("<non-digits>");
+        else if (characterClass == thisPattern->nonspacesCharacterClass())
+            out.print("<non-whitespace>");
+        else if (characterClass == thisPattern->nonwordcharCharacterClass())
+            out.print("<non-word>");
+        else if (characterClass == thisPattern->nonwordUnicodeIgnoreCaseCharCharacterClass())
+            out.print("<unicode non-ignore case>");
+        else {
+            bool needMatchesRangesSeperator = false;
+
+            auto dumpMatches = [&] (const char* prefix, Vector<UChar32> matches) {
+                size_t matchesSize = matches.size();
+                if (matchesSize) {
+                    if (needMatchesRangesSeperator)
+                        out.print(",");
+                    needMatchesRangesSeperator = true;
+
+                    out.print(prefix, ":(");
+                    for (size_t i = 0; i < matchesSize; ++i) {
+                        if (i)
+                            out.print(",");
+                        dumpUChar32(out, matches[i]);
+                    }
+                    out.print(")");
+                }
+            };
+
+            auto dumpRanges = [&] (const char* prefix, Vector<CharacterRange> ranges) {
+                size_t rangeSize = ranges.size();
+                if (rangeSize) {
+                    if (needMatchesRangesSeperator)
+                        out.print(",");
+                    needMatchesRangesSeperator = true;
+
+                    out.print(prefix, "ranges:(");
+                    for (size_t i = 0; i < rangeSize; ++i) {
+                        if (i)
+                            out.print(",");
+                        CharacterRange range = ranges[i];
+                        out.print("(");
+                        dumpUChar32(out, range.begin);
+                        out.print("..");
+                        dumpUChar32(out, range.end);
+                        out.print(")");
+                    }
+                    out.print(")");
+                }
+            };
+
+            out.print("[");
+            dumpMatches("ASCII", characterClass->m_matches);
+            dumpRanges("ASCII", characterClass->m_ranges);
+            dumpMatches("Unicode", characterClass->m_matchesUnicode);
+            dumpRanges("Unicode", characterClass->m_rangesUnicode);
+            out.print("]");
+        }
+        dumpQuantifier(out);
+        if (quantityType != QuantifierFixedCount || thisPattern->unicode())
+            out.print(",frame location ", frameLocation);
+        out.println();
+        break;
+    case TypeBackReference:
+        out.print("back reference to subpattern #", backReferenceSubpatternId);
+        out.println(",frame location ", frameLocation);
+        break;
+    case TypeForwardReference:
+        out.println("forward reference");
+        break;
+    case TypeParenthesesSubpattern:
+        if (m_capture)
+            out.print("captured ");
+        else
+            out.print("non-captured ");
+
+        FALLTHROUGH;
+    case TypeParentheticalAssertion:
+        if (m_invert)
+            out.print("inverted ");
+
+        if (type == TypeParenthesesSubpattern)
+            out.print("subpattern");
+        else if (type == TypeParentheticalAssertion)
+            out.print("assertion");
+
+        if (m_capture)
+            out.print(" #", parentheses.subpatternId);
+
+        dumpQuantifier(out);
+
+        if (parentheses.isCopy)
+            out.print(",copy");
+
+        if (parentheses.isTerminal)
+            out.print(",terminal");
+
+        if (quantityMaxCount != 1 || parentheses.isCopy || quantityType != QuantifierFixedCount)
+            out.println(",frame location ", frameLocation);
+        else
+            out.println();
+
+        if (parentheses.disjunction->m_alternatives.size() > 1) {
+            indentForNestingLevel(out, nestingDepth + 1);
+            unsigned alternativeFrameLocation = frameLocation;
+            if (quantityType != QuantifierFixedCount)
+                alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
+            out.println("alternative list,frame location ", alternativeFrameLocation);
+        }
+
+        parentheses.disjunction->dump(out, thisPattern, nestingDepth + 1);
+        break;
+    case TypeDotStarEnclosure:
+        out.println(".* enclosure,frame location ", thisPattern->m_initialStartValueFrameLocation);
+        break;
+    }
+}
+
+void PatternDisjunction::dump(PrintStream& out, YarrPattern* thisPattern, unsigned nestingDepth = 0)
+{
+    unsigned alternativeCount = m_alternatives.size();
+    for (unsigned i = 0; i < alternativeCount; ++i) {
+        indentForNestingLevel(out, nestingDepth);
+        if (alternativeCount > 1)
+            out.print("alternative #", i, ": ");
+        m_alternatives[i].get()->dump(out, thisPattern, nestingDepth + (alternativeCount > 1));
+    }
+}
+
+void YarrPattern::dumpPattern(const String& patternString)
+{
+    dumpPattern(WTF::dataFile(), patternString);
+}
+
+void YarrPattern::dumpPattern(PrintStream& out, const String& patternString)
+{
+    out.print("RegExp pattern for /");
+    out.print(patternString);
+    out.print("/");
+    if (global())
+        out.print("g");
+    if (ignoreCase())
+        out.print("i");
+    if (multiline())
+        out.print("m");
+    if (unicode())
+        out.print("u");
+    if (sticky())
+        out.print("y");
+    if (m_flags != NoFlags) {
+        bool printSeperator = false;
+        out.print(" (");
+        if (global()) {
+            out.print("global");
+            printSeperator = true;
+        }
+        if (ignoreCase()) {
+            if (printSeperator)
+                out.print("|");
+            out.print("ignore case");
+            printSeperator = true;
+        }
+        if (multiline()) {
+            if (printSeperator)
+                out.print("|");
+            out.print("multiline");
+            printSeperator = true;
+        }
+        if (unicode()) {
+            if (printSeperator)
+                out.print("|");
+            out.print("unicode");
+            printSeperator = true;
+        }
+        if (sticky()) {
+            if (printSeperator)
+                out.print("|");
+            out.print("sticky");
+            printSeperator = true;
+        }
+        out.print(")");
+    }
+    out.print(":\n");
+    m_body->dump(out, this);
+}
+
 } }
index bb1b779..cad61ac 100644 (file)
 
 #include "RegExpKey.h"
 #include <wtf/CheckedArithmetic.h>
+#include <wtf/PrintStream.h>
 #include <wtf/Vector.h>
 #include <wtf/text/WTFString.h>
 
 namespace JSC { namespace Yarr {
 
+struct YarrPattern;
 struct PatternDisjunction;
 
 struct CharacterRange {
@@ -222,6 +224,9 @@ struct PatternTerm {
         quantityMaxCount = maxCount;
         quantityType = type;
     }
+
+    void dumpQuantifier(PrintStream&);
+    void dump(PrintStream&, YarrPattern*, unsigned);
 };
 
 struct PatternAlternative {
@@ -258,6 +263,8 @@ public:
         return m_onceThrough;
     }
 
+    void dump(PrintStream&, YarrPattern*, unsigned);
+
     Vector<PatternTerm> m_terms;
     PatternDisjunction* m_parent;
     unsigned m_minimumSize;
@@ -282,6 +289,8 @@ public:
         return static_cast<PatternAlternative*>(m_alternatives.last().get());
     }
 
+    void dump(PrintStream&, YarrPattern*, unsigned);
+
     Vector<std::unique_ptr<PatternAlternative>> m_alternatives;
     PatternAlternative* m_parent;
     unsigned m_minimumSize;
@@ -448,6 +457,10 @@ struct YarrPattern {
         return nonwordUnicodeIgnoreCasecharCached;
     }
 
+    void dumpPattern(const String& pattern);
+    void dumpPattern(PrintStream& out, const String& pattern);
+
+    bool global() const { return m_flags & FlagGlobal; }
     bool ignoreCase() const { return m_flags & FlagIgnoreCase; }
     bool multiline() const { return m_flags & FlagMultiline; }
     bool sticky() const { return m_flags & FlagSticky; }