Use the Accelerate framework to optimize FEColorMatrix operations
authorsimon.fraser@apple.com <simon.fraser@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Fri, 7 Apr 2017 00:01:29 +0000 (00:01 +0000)
committersimon.fraser@apple.com <simon.fraser@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Fri, 7 Apr 2017 00:01:29 +0000 (00:01 +0000)
https://bugs.webkit.org/show_bug.cgi?id=170518

Reviewed by Tim Horton.

On macOS and iOS, we can use the Accelerate framework (vImage) to do color matrix
math to optimize color matrix, hue rotate, saturation and luminosity to alpha filters.

Change ImageBuffer::getUnmultipliedImageData() and getPremultipliedImageData() to
return the size of the returned Uint8ClampedArray in physical pixels, because we
need to pass that to vImage.

* html/canvas/CanvasRenderingContext2D.cpp:
(WebCore::CanvasRenderingContext2D::getImageData):
* platform/graphics/ImageBuffer.h:
* platform/graphics/cairo/ImageBufferCairo.cpp:
(WebCore::ImageBuffer::getUnmultipliedImageData):
(WebCore::ImageBuffer::getPremultipliedImageData):
* platform/graphics/cg/ImageBufferCG.cpp:
(WebCore::ImageBuffer::getUnmultipliedImageData):
(WebCore::ImageBuffer::getPremultipliedImageData):
* platform/graphics/filters/FEColorMatrix.cpp:
(WebCore::effectApplyAccelerated):
(WebCore::effectType):
(WebCore::FEColorMatrix::platformApplySoftware):
* platform/graphics/filters/FEDropShadow.cpp:
(WebCore::FEDropShadow::platformApplySoftware):
* platform/graphics/win/ImageBufferDirect2D.cpp:
(WebCore::ImageBuffer::getUnmultipliedImageData):
(WebCore::ImageBuffer::getPremultipliedImageData):

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@215069 268f45cc-cd09-0410-ab3c-d52691b4dbfc

Source/WebCore/ChangeLog
Source/WebCore/html/canvas/CanvasRenderingContext2D.cpp
Source/WebCore/platform/graphics/ImageBuffer.h
Source/WebCore/platform/graphics/cairo/ImageBufferCairo.cpp
Source/WebCore/platform/graphics/cg/ImageBufferCG.cpp
Source/WebCore/platform/graphics/filters/FEColorMatrix.cpp
Source/WebCore/platform/graphics/filters/FEDropShadow.cpp
Source/WebCore/platform/graphics/win/ImageBufferDirect2D.cpp

index 93226ac..483c036 100644 (file)
@@ -1,3 +1,36 @@
+2017-04-05  Simon Fraser  <simon.fraser@apple.com>
+
+        Use the Accelerate framework to optimize FEColorMatrix operations
+        https://bugs.webkit.org/show_bug.cgi?id=170518
+
+        Reviewed by Tim Horton.
+
+        On macOS and iOS, we can use the Accelerate framework (vImage) to do color matrix
+        math to optimize color matrix, hue rotate, saturation and luminosity to alpha filters.
+        
+        Change ImageBuffer::getUnmultipliedImageData() and getPremultipliedImageData() to
+        return the size of the returned Uint8ClampedArray in physical pixels, because we
+        need to pass that to vImage.
+
+        * html/canvas/CanvasRenderingContext2D.cpp:
+        (WebCore::CanvasRenderingContext2D::getImageData):
+        * platform/graphics/ImageBuffer.h:
+        * platform/graphics/cairo/ImageBufferCairo.cpp:
+        (WebCore::ImageBuffer::getUnmultipliedImageData):
+        (WebCore::ImageBuffer::getPremultipliedImageData):
+        * platform/graphics/cg/ImageBufferCG.cpp:
+        (WebCore::ImageBuffer::getUnmultipliedImageData):
+        (WebCore::ImageBuffer::getPremultipliedImageData):
+        * platform/graphics/filters/FEColorMatrix.cpp:
+        (WebCore::effectApplyAccelerated):
+        (WebCore::effectType):
+        (WebCore::FEColorMatrix::platformApplySoftware):
+        * platform/graphics/filters/FEDropShadow.cpp:
+        (WebCore::FEDropShadow::platformApplySoftware):
+        * platform/graphics/win/ImageBufferDirect2D.cpp:
+        (WebCore::ImageBuffer::getUnmultipliedImageData):
+        (WebCore::ImageBuffer::getPremultipliedImageData):
+
 2017-04-04  Simon Fraser  <simon.fraser@apple.com>
 
         Do some minor FEColorMatrix code cleanup and optimization
index 63bfdfa..17fa8b5 100644 (file)
@@ -2008,7 +2008,7 @@ ExceptionOr<RefPtr<ImageData>> CanvasRenderingContext2D::getImageData(ImageBuffe
     if (!buffer)
         return createEmptyImageData(imageDataRect.size());
 
-    auto byteArray = buffer->getUnmultipliedImageData(imageDataRect, coordinateSystem);
+    auto byteArray = buffer->getUnmultipliedImageData(imageDataRect, nullptr, coordinateSystem);
     if (!byteArray) {
         StringBuilder consoleMessage;
         consoleMessage.appendLiteral("Unable to get image data from canvas. Requested size was ");
index 25ce884..8f89367 100644 (file)
@@ -104,8 +104,8 @@ public:
 
     enum CoordinateSystem { LogicalCoordinateSystem, BackingStoreCoordinateSystem };
 
-    RefPtr<Uint8ClampedArray> getUnmultipliedImageData(const IntRect&, CoordinateSystem = LogicalCoordinateSystem) const;
-    RefPtr<Uint8ClampedArray> getPremultipliedImageData(const IntRect&, CoordinateSystem = LogicalCoordinateSystem) const;
+    RefPtr<Uint8ClampedArray> getUnmultipliedImageData(const IntRect&, IntSize* pixelArrayDimensions = nullptr, CoordinateSystem = LogicalCoordinateSystem) const;
+    RefPtr<Uint8ClampedArray> getPremultipliedImageData(const IntRect&, IntSize* pixelArrayDimensions = nullptr, CoordinateSystem = LogicalCoordinateSystem) const;
 
     void putByteArray(Multiply multiplied, Uint8ClampedArray*, const IntSize& sourceSize, const IntRect& sourceRect, const IntPoint& destPoint, CoordinateSystem = LogicalCoordinateSystem);
     
index 74768e3..bc7b4de 100644 (file)
@@ -433,17 +433,21 @@ inline Unit backingStoreUnit(const Unit& value, ImageBuffer::CoordinateSystem co
     return result;
 }
 
-RefPtr<Uint8ClampedArray> ImageBuffer::getUnmultipliedImageData(const IntRect& rect, CoordinateSystem coordinateSystem) const
+RefPtr<Uint8ClampedArray> ImageBuffer::getUnmultipliedImageData(const IntRect& rect, IntSize* pixelArrayDimensions, CoordinateSystem coordinateSystem) const
 {
     IntRect logicalRect = logicalUnit(rect, coordinateSystem, m_resolutionScale);
     IntRect backingStoreRect = backingStoreUnit(rect, coordinateSystem, m_resolutionScale);
+    if (pixelArrayDimensions)
+        *pixelArrayDimensions = backingStoreRect.size();
     return getImageData<Unmultiplied>(backingStoreRect, logicalRect, m_data, m_size, m_logicalSize, m_resolutionScale);
 }
 
-RefPtr<Uint8ClampedArray> ImageBuffer::getPremultipliedImageData(const IntRect& rect, CoordinateSystem coordinateSystem) const
+RefPtr<Uint8ClampedArray> ImageBuffer::getPremultipliedImageData(const IntRect& rect, IntSize* pixelArrayDimensions, CoordinateSystem coordinateSystem) const
 {
     IntRect logicalRect = logicalUnit(rect, coordinateSystem, m_resolutionScale);
     IntRect backingStoreRect = backingStoreUnit(rect, coordinateSystem, m_resolutionScale);
+    if (pixelArrayDimensions)
+        *pixelArrayDimensions = backingStoreRect.size();
     return getImageData<Premultiplied>(backingStoreRect, logicalRect, m_data, m_size, m_logicalSize, m_resolutionScale);
 }
 
index fbbcf84..9d0427d 100644 (file)
@@ -382,7 +382,7 @@ void ImageBuffer::drawPattern(GraphicsContext& destContext, const FloatRect& des
     }
 }
 
-RefPtr<Uint8ClampedArray> ImageBuffer::getUnmultipliedImageData(const IntRect& rect, CoordinateSystem coordinateSystem) const
+RefPtr<Uint8ClampedArray> ImageBuffer::getUnmultipliedImageData(const IntRect& rect, IntSize* pixelArrayDimensions, CoordinateSystem coordinateSystem) const
 {
     if (context().isAcceleratedContext())
         flushContext();
@@ -391,10 +391,13 @@ RefPtr<Uint8ClampedArray> ImageBuffer::getUnmultipliedImageData(const IntRect& r
     if (coordinateSystem == LogicalCoordinateSystem)
         srcRect.scale(m_resolutionScale);
 
+    if (pixelArrayDimensions)
+        *pixelArrayDimensions = srcRect.size();
+
     return m_data.getData(srcRect, internalSize(), context().isAcceleratedContext(), true, 1);
 }
 
-RefPtr<Uint8ClampedArray> ImageBuffer::getPremultipliedImageData(const IntRect& rect, CoordinateSystem coordinateSystem) const
+RefPtr<Uint8ClampedArray> ImageBuffer::getPremultipliedImageData(const IntRect& rect, IntSize* pixelArrayDimensions, CoordinateSystem coordinateSystem) const
 {
     if (context().isAcceleratedContext())
         flushContext();
@@ -403,6 +406,9 @@ RefPtr<Uint8ClampedArray> ImageBuffer::getPremultipliedImageData(const IntRect&
     if (coordinateSystem == LogicalCoordinateSystem)
         srcRect.scale(m_resolutionScale);
 
+    if (pixelArrayDimensions)
+        *pixelArrayDimensions = srcRect.size();
+
     return m_data.getData(srcRect, internalSize(), context().isAcceleratedContext(), false, 1);
 }
 
index 50b2f14..ade1b8b 100644 (file)
 #include <runtime/Uint8ClampedArray.h>
 #include <wtf/MathExtras.h>
 
+#if USE(ACCELERATE)
+#include <Accelerate/Accelerate.h>
+#endif
+
 #define PRINT_FILTER_PERFORMANCE 0
 
 namespace WebCore {
@@ -104,8 +108,120 @@ inline void luminance(float& red, float& green, float& blue, float& alpha)
     blue = 0;
 }
 
+#if USE(ACCELERATE)
+template<ColorMatrixType filterType>
+bool effectApplyAccelerated(Uint8ClampedArray* pixelArray, const Vector<float>& values, float components[9], IntSize bufferSize)
+{
+    ASSERT(pixelArray->length() == bufferSize.area().unsafeGet() * 4);
+    
+    if (filterType == FECOLORMATRIX_TYPE_MATRIX) {
+        // vImageMatrixMultiply_ARGB8888 takes a 4x4 matrix, if any value in the last column of the FEColorMatrix 5x4 matrix
+        // is not zero, fall back to non-vImage code.
+        if (values[4] != 0 || values[9] != 0 || values[14] != 0 || values[19] != 0)
+            return false;
+    }
+
+    const int32_t divisor = 256;
+    uint8_t* data = pixelArray->data();
+
+    vImage_Buffer src;
+    src.width = bufferSize.width();
+    src.height = bufferSize.height();
+    src.rowBytes = bufferSize.width() * 4;
+    src.data = data;
+    
+    vImage_Buffer dest;
+    dest.width = bufferSize.width();
+    dest.height = bufferSize.height();
+    dest.rowBytes = bufferSize.width() * 4;
+    dest.data = data;
+
+    switch (filterType) {
+    case FECOLORMATRIX_TYPE_MATRIX: {
+        const int16_t matrix[4 * 4] = {
+            static_cast<int16_t>(roundf(values[ 0] * divisor)),
+            static_cast<int16_t>(roundf(values[ 5] * divisor)),
+            static_cast<int16_t>(roundf(values[10] * divisor)),
+            static_cast<int16_t>(roundf(values[15] * divisor)),
+
+            static_cast<int16_t>(roundf(values[ 1] * divisor)),
+            static_cast<int16_t>(roundf(values[ 6] * divisor)),
+            static_cast<int16_t>(roundf(values[11] * divisor)),
+            static_cast<int16_t>(roundf(values[16] * divisor)),
+
+            static_cast<int16_t>(roundf(values[ 2] * divisor)),
+            static_cast<int16_t>(roundf(values[ 7] * divisor)),
+            static_cast<int16_t>(roundf(values[12] * divisor)),
+            static_cast<int16_t>(roundf(values[17] * divisor)),
+
+            static_cast<int16_t>(roundf(values[ 3] * divisor)),
+            static_cast<int16_t>(roundf(values[ 8] * divisor)),
+            static_cast<int16_t>(roundf(values[13] * divisor)),
+            static_cast<int16_t>(roundf(values[18] * divisor)),
+        };
+        vImageMatrixMultiply_ARGB8888(&src, &dest, matrix, divisor, nullptr, nullptr, kvImageNoFlags);
+        break;
+    }
+
+    case FECOLORMATRIX_TYPE_SATURATE:
+    case FECOLORMATRIX_TYPE_HUEROTATE: {
+        const int16_t matrix[4 * 4] = {
+            static_cast<int16_t>(roundf(components[0] * divisor)),
+            static_cast<int16_t>(roundf(components[3] * divisor)),
+            static_cast<int16_t>(roundf(components[6] * divisor)),
+            0,
+
+            static_cast<int16_t>(roundf(components[1] * divisor)),
+            static_cast<int16_t>(roundf(components[4] * divisor)),
+            static_cast<int16_t>(roundf(components[7] * divisor)),
+            0,
+
+            static_cast<int16_t>(roundf(components[2] * divisor)),
+            static_cast<int16_t>(roundf(components[5] * divisor)),
+            static_cast<int16_t>(roundf(components[8] * divisor)),
+            0,
+
+            0,
+            0,
+            0,
+            divisor,
+        };
+        vImageMatrixMultiply_ARGB8888(&src, &dest, matrix, divisor, nullptr, nullptr, kvImageNoFlags);
+        break;
+    }
+    case FECOLORMATRIX_TYPE_LUMINANCETOALPHA: {
+        const int16_t matrix[4 * 4] = {
+            0,
+            0,
+            0,
+            static_cast<int16_t>(roundf(0.2125 * divisor)),
+
+            0,
+            0,
+            0,
+            static_cast<int16_t>(roundf(0.7154 * divisor)),
+
+            0,
+            0,
+            0,
+            static_cast<int16_t>(roundf(0.0721 * divisor)),
+
+            0,
+            0,
+            0,
+            0,
+        };
+        vImageMatrixMultiply_ARGB8888(&src, &dest, matrix, divisor, nullptr, nullptr, kvImageNoFlags);
+        break;
+    }
+    }
+    
+    return true;
+}
+#endif
+
 template<ColorMatrixType filterType>
-void effectType(Uint8ClampedArray* pixelArray, const Vector<float>& values)
+void effectType(Uint8ClampedArray* pixelArray, const Vector<float>& values, IntSize bufferSize)
 {
     float components[9];
 
@@ -116,6 +232,13 @@ void effectType(Uint8ClampedArray* pixelArray, const Vector<float>& values)
 
     unsigned pixelArrayLength = pixelArray->length();
 
+#if USE(ACCELERATE)
+    if (effectApplyAccelerated<filterType>(pixelArray, values, components, bufferSize))
+        return;
+#else
+    UNUSED_PARAM(bufferSize);
+#endif
+
     switch (filterType) {
     case FECOLORMATRIX_TYPE_MATRIX:
         for (unsigned pixelByteOffset = 0; pixelByteOffset < pixelArrayLength; pixelByteOffset += 4) {
@@ -179,23 +302,24 @@ void FEColorMatrix::platformApplySoftware()
         resultImage->context().drawImageBuffer(*inBuffer, drawingRegionOfInputImage(in->absolutePaintRect()));
 
     IntRect imageRect(IntPoint(), resultImage->logicalSize());
-    RefPtr<Uint8ClampedArray> pixelArray = resultImage->getUnmultipliedImageData(imageRect);
+    IntSize pixelArrayDimensions;
+    RefPtr<Uint8ClampedArray> pixelArray = resultImage->getUnmultipliedImageData(imageRect, &pixelArrayDimensions);
     Vector<float> values = normalizedFloats(m_values);
 
     switch (m_type) {
     case FECOLORMATRIX_TYPE_UNKNOWN:
         break;
     case FECOLORMATRIX_TYPE_MATRIX:
-        effectType<FECOLORMATRIX_TYPE_MATRIX>(pixelArray.get(), values);
+        effectType<FECOLORMATRIX_TYPE_MATRIX>(pixelArray.get(), values, pixelArrayDimensions);
         break;
     case FECOLORMATRIX_TYPE_SATURATE: 
-        effectType<FECOLORMATRIX_TYPE_SATURATE>(pixelArray.get(), values);
+        effectType<FECOLORMATRIX_TYPE_SATURATE>(pixelArray.get(), values, pixelArrayDimensions);
         break;
     case FECOLORMATRIX_TYPE_HUEROTATE:
-        effectType<FECOLORMATRIX_TYPE_HUEROTATE>(pixelArray.get(), values);
+        effectType<FECOLORMATRIX_TYPE_HUEROTATE>(pixelArray.get(), values, pixelArrayDimensions);
         break;
     case FECOLORMATRIX_TYPE_LUMINANCETOALPHA:
-        effectType<FECOLORMATRIX_TYPE_LUMINANCETOALPHA>(pixelArray.get(), values);
+        effectType<FECOLORMATRIX_TYPE_LUMINANCETOALPHA>(pixelArray.get(), values, pixelArrayDimensions);
         setIsAlphaImage(true);
         break;
     }
index fa0be80..6f1d3db 100644 (file)
@@ -99,7 +99,7 @@ void FEDropShadow::platformApplySoftware()
 
     // TODO: Direct pixel access to ImageBuffer would avoid copying the ImageData.
     IntRect shadowArea(IntPoint(), resultImage->internalSize());
-    RefPtr<Uint8ClampedArray> srcPixelArray = resultImage->getPremultipliedImageData(shadowArea, ImageBuffer::BackingStoreCoordinateSystem);
+    RefPtr<Uint8ClampedArray> srcPixelArray = resultImage->getPremultipliedImageData(shadowArea, nullptr, ImageBuffer::BackingStoreCoordinateSystem);
 
     contextShadow.blurLayerImage(srcPixelArray->data(), shadowArea.size(), 4 * shadowArea.size().width());
 
index 99d860c..701ee64 100644 (file)
@@ -215,7 +215,7 @@ void ImageBuffer::drawPattern(GraphicsContext& destContext, const FloatRect& des
     }
 }
 
-RefPtr<Uint8ClampedArray> ImageBuffer::getUnmultipliedImageData(const IntRect& rect, CoordinateSystem coordinateSystem) const
+RefPtr<Uint8ClampedArray> ImageBuffer::getUnmultipliedImageData(const IntRect& rect, IntSize* pixelArrayDimensions, CoordinateSystem coordinateSystem) const
 {
     if (context().isAcceleratedContext())
         flushContext();
@@ -224,10 +224,13 @@ RefPtr<Uint8ClampedArray> ImageBuffer::getUnmultipliedImageData(const IntRect& r
     if (coordinateSystem == LogicalCoordinateSystem)
         srcRect.scale(m_resolutionScale);
 
+    if (pixelArrayDimensions)
+        *pixelArrayDimensions = srcRect.size();
+
     return m_data.getData(srcRect, internalSize(), context().isAcceleratedContext(), true, 1);
 }
 
-RefPtr<Uint8ClampedArray> ImageBuffer::getPremultipliedImageData(const IntRect& rect, CoordinateSystem coordinateSystem) const
+RefPtr<Uint8ClampedArray> ImageBuffer::getPremultipliedImageData(const IntRect& rect, IntSize* pixelArrayDimensions, CoordinateSystem coordinateSystem) const
 {
     if (context().isAcceleratedContext())
         flushContext();
@@ -236,6 +239,9 @@ RefPtr<Uint8ClampedArray> ImageBuffer::getPremultipliedImageData(const IntRect&
     if (coordinateSystem == LogicalCoordinateSystem)
         srcRect.scale(m_resolutionScale);
 
+    if (pixelArrayDimensions)
+        *pixelArrayDimensions = srcRect.size();
+
     return m_data.getData(srcRect, internalSize(), context().isAcceleratedContext(), false, 1);
 }