Share and improve extraction of character for operator and token elements
authorfred.wang@free.fr <fred.wang@free.fr@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Tue, 23 Aug 2016 13:36:26 +0000 (13:36 +0000)
committerfred.wang@free.fr <fred.wang@free.fr@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Tue, 23 Aug 2016 13:36:26 +0000 (13:36 +0000)
https://bugs.webkit.org/show_bug.cgi?id=160462

Patch by Frederic Wang <fwang@igalia.com> on 2016-08-03
Reviewed by Darin Adler.

No new tests, already covered by existing tests.

* mathml/MathMLElement.cpp:
(WebCore::MathMLElement::stripLeadingAndTrailingWhitespace): Make this a protected member of
MathMLElement so that it can be used in MathMLTokenElement.
(WebCore::skipLeadingAndTrailingWhitespace): Deleted.
* mathml/MathMLElement.h: Declare stripLeadingAndTrailingWhitespace.
* mathml/MathMLOperatorElement.cpp:
(WebCore::MathMLOperatorElement::parseOperatorChar): Use convertToSingleCodePoint to extract
a code point more efficiently. For now, we continue to only handle BMP characters.
* mathml/MathMLTokenElement.cpp:
(WebCore::MathMLTokenElement::convertToSingleCodePoint): Helper function to try and convert a
string to a single code point after having removed leading and trailing space.
* mathml/MathMLTokenElement.h: Declare convertToSingleCodePoint.
* rendering/mathml/RenderMathMLToken.cpp:
(WebCore::RenderMathMLToken::updateMathVariantGlyph): Use convertToSingleCodePoint to extract
a code point more efficiently.

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@204830 268f45cc-cd09-0410-ab3c-d52691b4dbfc

Source/WebCore/ChangeLog
Source/WebCore/mathml/MathMLElement.cpp
Source/WebCore/mathml/MathMLElement.h
Source/WebCore/mathml/MathMLOperatorElement.cpp
Source/WebCore/mathml/MathMLTokenElement.cpp
Source/WebCore/mathml/MathMLTokenElement.h
Source/WebCore/rendering/mathml/RenderMathMLToken.cpp

index 77fb1fb..296a273 100644 (file)
@@ -1,3 +1,28 @@
+2016-08-03  Frederic Wang  <fwang@igalia.com>
+
+        Share and improve extraction of character for operator and token elements
+        https://bugs.webkit.org/show_bug.cgi?id=160462
+
+        Reviewed by Darin Adler.
+
+        No new tests, already covered by existing tests.
+
+        * mathml/MathMLElement.cpp:
+        (WebCore::MathMLElement::stripLeadingAndTrailingWhitespace): Make this a protected member of
+        MathMLElement so that it can be used in MathMLTokenElement.
+        (WebCore::skipLeadingAndTrailingWhitespace): Deleted.
+        * mathml/MathMLElement.h: Declare stripLeadingAndTrailingWhitespace.
+        * mathml/MathMLOperatorElement.cpp:
+        (WebCore::MathMLOperatorElement::parseOperatorChar): Use convertToSingleCodePoint to extract
+        a code point more efficiently. For now, we continue to only handle BMP characters.
+        * mathml/MathMLTokenElement.cpp:
+        (WebCore::MathMLTokenElement::convertToSingleCodePoint): Helper function to try and convert a
+        string to a single code point after having removed leading and trailing space.
+        * mathml/MathMLTokenElement.h: Declare convertToSingleCodePoint.
+        * rendering/mathml/RenderMathMLToken.cpp:
+        (WebCore::RenderMathMLToken::updateMathVariantGlyph): Use convertToSingleCodePoint to extract
+        a code point more efficiently.
+
 2016-08-23  Frederic Wang  <fwang@igalia.com>
 
         Rename MathMLInlineContainerElement to MathMLPresentationElement
index c3caa02..844f7b1 100644 (file)
@@ -365,7 +365,7 @@ int MathMLElement::tabIndex() const
     return Element::tabIndex();
 }
 
-static inline StringView skipLeadingAndTrailingWhitespace(const StringView& stringView)
+StringView MathMLElement::stripLeadingAndTrailingWhitespace(const StringView& stringView)
 {
     unsigned start = 0, stringLength = stringView.length();
     while (stringLength > 0 && isHTMLSpace(stringView[start])) {
@@ -470,7 +470,7 @@ MathMLElement::Length MathMLElement::parseMathMLLength(const String& string)
     // Instead, we just use isHTMLSpace and toFloat to parse these parts.
 
     // We first skip whitespace from both ends of the string.
-    StringView stringView = skipLeadingAndTrailingWhitespace(string);
+    StringView stringView = stripLeadingAndTrailingWhitespace(string);
 
     if (stringView.isEmpty())
         return Length();
index 2713774..bb95339 100644 (file)
@@ -93,6 +93,8 @@ public:
 protected:
     MathMLElement(const QualifiedName& tagName, Document&);
 
+    static StringView stripLeadingAndTrailingWhitespace(const StringView&);
+
     void parseAttribute(const QualifiedName&, const AtomicString&) override;
     bool childShouldCreateRenderer(const Node&) const override;
 
index 45fc783..81c17da 100644 (file)
@@ -50,16 +50,18 @@ Ref<MathMLOperatorElement> MathMLOperatorElement::create(const QualifiedName& ta
 MathMLOperatorElement::OperatorChar MathMLOperatorElement::parseOperatorChar(const String& string)
 {
     OperatorChar operatorChar;
-
-    // We collapse the whitespace and replace the hyphens by minus signs.
-    AtomicString textContent = string.stripWhiteSpace().simplifyWhiteSpace().replace(hyphenMinus, minusSign).impl();
-
-    // We verify whether the operator text can be represented by a single UChar.
-    // FIXME: This is a really inefficient way to extract a character from a string (https://webkit.org/b/160241#c7).
-    // FIXME: This does not handle surrogate pairs (https://webkit.org/b/122296).
-    // FIXME: This does not handle <mo> operators with multiple characters (https://webkit.org/b/124828).
-    operatorChar.character = textContent.length() == 1 ? textContent[0] : 0;
-    operatorChar.isVertical = MathMLOperatorDictionary::isVertical(operatorChar.character);
+    // FIXME: This operator dictionary does not accept multiple characters (https://webkit.org/b/124828).
+    if (auto codePoint = convertToSingleCodePoint(string)) {
+        // FIXME: MathMLOperatorDictionary/RenderMathMLOperator/MathOperator do not support non-BMP characters (https://webkit.org/b/122296).
+        if (U_IS_BMP(codePoint.value())) {
+            UChar character = codePoint.value();
+            // The minus sign renders better than the hyphen sign used in some MathML formulas.
+            if (character == hyphenMinus)
+                character = minusSign;
+            operatorChar.character = character;
+            operatorChar.isVertical = MathMLOperatorDictionary::isVertical(operatorChar.character);
+        }
+    }
     return operatorChar;
 }
 
index b07054d..f658396 100644 (file)
@@ -88,6 +88,17 @@ bool MathMLTokenElement::childShouldCreateRenderer(const Node& child) const
     return isPhrasingContent(child) && StyledElement::childShouldCreateRenderer(child);
 }
 
+Optional<UChar32> MathMLTokenElement::convertToSingleCodePoint(StringView string)
+{
+    auto codePoints = stripLeadingAndTrailingWhitespace(string).codePoints();
+    auto iterator = codePoints.begin();
+    if (iterator == codePoints.end())
+        return Nullopt;
+    Optional<UChar32> character = *iterator;
+    ++iterator;
+    return iterator == codePoints.end() ? character : Nullopt;
+}
+
 }
 
 #endif // ENABLE(MATHML)
index b9b67e2..ea9c0f4 100644 (file)
@@ -37,6 +37,8 @@ class MathMLTokenElement : public MathMLElement {
 public:
     static Ref<MathMLTokenElement> create(const QualifiedName& tagName, Document&);
 
+    static Optional<UChar32> convertToSingleCodePoint(StringView);
+
 protected:
     MathMLTokenElement(const QualifiedName& tagName, Document&);
     void childrenChanged(const ChildChange&) override;
index bfed295..17e928e 100644 (file)
@@ -527,14 +527,12 @@ void RenderMathMLToken::updateMathVariantGlyph()
     }
 
     const auto& tokenElement = element();
-    AtomicString textContent = element().textContent().stripWhiteSpace().simplifyWhiteSpace();
-    if (textContent.length() == 1) {
-        UChar32 codePoint = textContent[0];
+    if (auto codePoint = MathMLTokenElement::convertToSingleCodePoint(element().textContent())) {
         MathMLElement::MathVariant mathvariant = mathMLStyle()->mathVariant();
         if (mathvariant == MathMLElement::MathVariant::None)
             mathvariant = tokenElement.hasTagName(MathMLNames::miTag) ? MathMLElement::MathVariant::Italic : MathMLElement::MathVariant::Normal;
-        UChar32 transformedCodePoint = mathVariant(codePoint, mathvariant);
-        if (transformedCodePoint != codePoint)
+        UChar32 transformedCodePoint = mathVariant(codePoint.value(), mathvariant);
+        if (transformedCodePoint != codePoint.value())
             m_mathVariantGlyph = style().fontCascade().glyphDataForCharacter(transformedCodePoint, !style().isLeftToRightDirection());
     }
 }