Make PNGImageDecoder::rowAvailable auto-vectorizable
authorallan.jensen@digia.com <allan.jensen@digia.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Fri, 17 May 2013 12:41:28 +0000 (12:41 +0000)
committerallan.jensen@digia.com <allan.jensen@digia.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Fri, 17 May 2013 12:41:28 +0000 (12:41 +0000)
https://bugs.webkit.org/show_bug.cgi?id=116151

Reviewed by Benjamin Poulain.

Changed the main loops under PNGImageDecoder::rowAvailable so that they
avoid branches and non-sequential table look ups.

Together with automatic vectorization by the compiler this provides around
4x speed-up with AVX or 2x speed-up on generic x64. Shaving off 12-40% on
PNG decoding in general.

* platform/graphics/Color.cpp:
(WebCore::premultipliedARGBFromColor):
* platform/graphics/Color.h:
(WebCore::fastDivideBy255):
* platform/graphics/filters/FEBlend.cpp:
* platform/image-decoders/png/PNGImageDecoder.cpp:
(WebCore::setPixelRGB):
(WebCore::setPixelRGBA):
(WebCore::setPixelRGBA_Premultiplied):
(WebCore::PNGImageDecoder::rowAvailable):

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@150252 268f45cc-cd09-0410-ab3c-d52691b4dbfc

Source/WebCore/ChangeLog
Source/WebCore/platform/graphics/Color.cpp
Source/WebCore/platform/graphics/Color.h
Source/WebCore/platform/graphics/filters/FEBlend.cpp
Source/WebCore/platform/image-decoders/png/PNGImageDecoder.cpp

index d9cc4ba..d4613db 100644 (file)
@@ -1,3 +1,28 @@
+2013-05-17  Allan Sandfeld Jensen  <allan.jensen@digia.com>
+
+        Make PNGImageDecoder::rowAvailable auto-vectorizable
+        https://bugs.webkit.org/show_bug.cgi?id=116151
+
+        Reviewed by Benjamin Poulain.
+
+        Changed the main loops under PNGImageDecoder::rowAvailable so that they
+        avoid branches and non-sequential table look ups.
+
+        Together with automatic vectorization by the compiler this provides around
+        4x speed-up with AVX or 2x speed-up on generic x64. Shaving off 12-40% on
+        PNG decoding in general.
+
+        * platform/graphics/Color.cpp:
+        (WebCore::premultipliedARGBFromColor):
+        * platform/graphics/Color.h:
+        (WebCore::fastDivideBy255):
+        * platform/graphics/filters/FEBlend.cpp:
+        * platform/image-decoders/png/PNGImageDecoder.cpp:
+        (WebCore::setPixelRGB):
+        (WebCore::setPixelRGBA):
+        (WebCore::setPixelRGBA_Premultiplied):
+        (WebCore::PNGImageDecoder::rowAvailable):
+
 2013-05-17  Joseph Pecoraro  <pecoraro@apple.com>
 
         Web Inspector: Allow "//# sourceMappingURL" syntax alongside "//@"
 2013-05-17  Joseph Pecoraro  <pecoraro@apple.com>
 
         Web Inspector: Allow "//# sourceMappingURL" syntax alongside "//@"
index 04dc73e..51c7100 100644 (file)
@@ -434,9 +434,9 @@ RGBA32 premultipliedARGBFromColor(const Color& color)
     unsigned alpha = color.alpha();
     if (alpha < 255) {
         pixelColor = Color::createUnchecked(
     unsigned alpha = color.alpha();
     if (alpha < 255) {
         pixelColor = Color::createUnchecked(
-            (color.red() * alpha  + 254) / 255,
-            (color.green() * alpha  + 254) / 255,
-            (color.blue() * alpha  + 254) / 255,
+            fastDivideBy255(color.red() * alpha + 254),
+            fastDivideBy255(color.green() * alpha + 254),
+            fastDivideBy255(color.blue() * alpha + 254),
             alpha).rgb();
     } else
          pixelColor = color.rgb();
             alpha).rgb();
     } else
          pixelColor = color.rgb();
index 31b9f4a..8fe100a 100644 (file)
@@ -202,6 +202,14 @@ inline Color blend(const Color& from, const Color& to, double progress, bool ble
                  blend(from.alpha(), to.alpha(), progress));
 }
 
                  blend(from.alpha(), to.alpha(), progress));
 }
 
+inline uint16_t fastDivideBy255(uint16_t value)
+{
+    // This is an approximate algorithm for division by 255, but it gives accurate results for 16bit values.
+    uint16_t approximation = value >> 8;
+    uint16_t remainder = value - (approximation * 255) + 1;
+    return approximation + (remainder >> 8);
+}
+
 #if USE(CG)
 CGColorRef cachedCGColor(const Color&, ColorSpace);
 #endif
 #if USE(CG)
 CGColorRef cachedCGColor(const Color&, ColorSpace);
 #endif
index d5897c3..bf527ff 100644 (file)
@@ -63,14 +63,6 @@ bool FEBlend::setBlendMode(BlendModeType mode)
     return true;
 }
 
     return true;
 }
 
-static inline unsigned char fastDivideBy255(uint16_t value)
-{
-    // This is an approximate algorithm for division by 255, but it gives accurate results for 16bit values.
-    uint16_t quotient = value >> 8;
-    uint16_t remainder = value - (quotient * 255) + 1;
-    return quotient + (remainder >> 8);
-}
-
 inline unsigned char feBlendNormal(unsigned char colorA, unsigned char colorB, unsigned char alphaA, unsigned char)
 {
     return fastDivideBy255((255 - alphaA) * colorB + colorA * 255);
 inline unsigned char feBlendNormal(unsigned char colorA, unsigned char colorB, unsigned char alphaA, unsigned char)
 {
     return fastDivideBy255((255 - alphaA) * colorB + colorA * 255);
index 7984c9a..776acce 100644 (file)
@@ -40,6 +40,7 @@
 #include "config.h"
 #include "PNGImageDecoder.h"
 
 #include "config.h"
 #include "PNGImageDecoder.h"
 
+#include "Color.h"
 #include "PlatformInstrumentation.h"
 #include "png.h"
 #include <wtf/OwnArrayPtr.h>
 #include "PlatformInstrumentation.h"
 #include "png.h"
 #include <wtf/OwnArrayPtr.h>
@@ -402,6 +403,29 @@ void PNGImageDecoder::headerAvailable()
     }
 }
 
     }
 }
 
+static inline void setPixelRGB(ImageFrame::PixelData* dest, png_bytep pixel)
+{
+    *dest = 0xFF000000U | pixel[0] << 16 | pixel[1] << 8 | pixel[2];
+}
+
+static inline void setPixelRGBA(ImageFrame::PixelData* dest, png_bytep pixel, unsigned char& nonTrivialAlphaMask)
+{
+    unsigned char a = pixel[3];
+    *dest = a << 24 | pixel[0] << 16 | pixel[1] << 8 | pixel[2];
+    nonTrivialAlphaMask |= (255 - a);
+}
+
+static inline void setPixelPremultipliedRGBA(ImageFrame::PixelData* dest, png_bytep pixel, unsigned char& nonTrivialAlphaMask)
+{
+    unsigned char a = pixel[3];
+    unsigned char r = fastDivideBy255(pixel[0] * a);
+    unsigned char g = fastDivideBy255(pixel[1] * a);
+    unsigned char b = fastDivideBy255(pixel[2] * a);
+
+    *dest = a << 24 | r << 16 | g << 8 | b;
+    nonTrivialAlphaMask |= (255 - a);
+}
+
 void PNGImageDecoder::rowAvailable(unsigned char* rowBuffer, unsigned rowIndex, int)
 {
     if (m_frameBufferCache.isEmpty())
 void PNGImageDecoder::rowAvailable(unsigned char* rowBuffer, unsigned rowIndex, int)
 {
     if (m_frameBufferCache.isEmpty())
@@ -501,27 +525,37 @@ void PNGImageDecoder::rowAvailable(unsigned char* rowBuffer, unsigned rowIndex,
     // Write the decoded row pixels to the frame buffer.
     ImageFrame::PixelData* address = buffer.getAddr(0, y);
     int width = scaledSize().width();
     // Write the decoded row pixels to the frame buffer.
     ImageFrame::PixelData* address = buffer.getAddr(0, y);
     int width = scaledSize().width();
-    bool nonTrivialAlpha = false;
+    unsigned char nonTrivialAlphaMask = 0;
 
 #if ENABLE(IMAGE_DECODER_DOWN_SAMPLING)
 
 #if ENABLE(IMAGE_DECODER_DOWN_SAMPLING)
-    for (int x = 0; x < width; ++x) {
-        png_bytep pixel = row + (m_scaled ? m_scaledColumns[x] : x) * colorChannels;
-        unsigned alpha = hasAlpha ? pixel[3] : 255;
-        buffer.setRGBA(address++, pixel[0], pixel[1], pixel[2], alpha);
-        nonTrivialAlpha |= alpha < 255;
-    }
-#else
-    ASSERT(!m_scaled);
-    png_bytep pixel = row;
-    for (int x = 0; x < width; ++x, pixel += colorChannels) {
-        unsigned alpha = hasAlpha ? pixel[3] : 255;
-        buffer.setRGBA(address++, pixel[0], pixel[1], pixel[2], alpha);
-        nonTrivialAlpha |= alpha < 255;
-    }
+    if (m_scaled) {
+        for (int x = 0; x < width; ++x) {
+            png_bytep pixel = row + m_scaledColumns[x] * colorChannels;
+            unsigned alpha = hasAlpha ? pixel[3] : 255;
+            buffer.setRGBA(address++, pixel[0], pixel[1], pixel[2], alpha);
+            nonTrivialAlphaMask |= (255 - alpha);
+        }
+    } else
 #endif
 #endif
+    {
+        png_bytep pixel = row;
+        if (hasAlpha) {
+            if (buffer.premultiplyAlpha()) {
+                for (int x = 0; x < width; ++x, pixel += 4)
+                    setPixelPremultipliedRGBA(address++, pixel, nonTrivialAlphaMask);
+            } else {
+                for (int x = 0; x < width; ++x, pixel += 4)
+                    setPixelRGBA(address++, pixel, nonTrivialAlphaMask);
+            }
+        } else {
+            for (int x = 0; x < width; ++x, pixel += 3)
+                setPixelRGB(address++, pixel);
+        }
+    }
+
 
 
-    if (nonTrivialAlpha && !buffer.hasAlpha())
-        buffer.setHasAlpha(nonTrivialAlpha);
+    if (nonTrivialAlphaMask && !buffer.hasAlpha())
+        buffer.setHasAlpha(true);
 }
 
 void PNGImageDecoder::pngComplete()
 }
 
 void PNGImageDecoder::pngComplete()