From 11d59389e178a87c0acc7c6ac8798fa99aa1790f Mon Sep 17 00:00:00 2001 From: "dino@apple.com" Date: Tue, 9 Dec 2014 01:31:37 +0000 Subject: [PATCH] [Apple] Use Accelerate framework to speed-up FEGaussianBlur https://bugs.webkit.org/show_bug.cgi?id=139310 PerformanceTests: Reviewed by Simon Fraser. Add an interactive performance test that measures the speed of a set of blur operations on a generated images. * Interactive/blur-filter-timing.html: Added. Source/WebCore: Reviewed by Simon Fraser. Using Apple's Accelerate framework provides faster blurs than the parallel jobs approach, especially since r168577 which started performing retina-accurate filters. Using Accelerate.framework to replace the existing box blur (what we use to approximate Gaussian blurs) gets about a 20% speedup on desktop class machines, but between a 2x-6x speedup on iOS hardware. Obviously this depends on the size of the content being blurred, but it is still good. The change is to intercept the platformApply function on FEGaussianBlur and send it off to Accelerate. There is an interactive performance test: PerformanceTests/Interactive/blur-filter-timing.html * platform/graphics/filters/FEGaussianBlur.cpp: (WebCore::kernelPosition): Move this to a file static function from the .h. (WebCore::accelerateBoxBlur): The Accelerate implementation. (WebCore::standardBoxBlur): The default generic/standard implementation. (WebCore::FEGaussianBlur::platformApplyGeneric): Use accelerate or the default form. (WebCore::FEGaussianBlur::platformApply): Don't try the parallelJobs approach if Accelerate is available. * platform/graphics/filters/FEGaussianBlur.h: (WebCore::FEGaussianBlur::kernelPosition): Deleted. Move into the .cpp. Source/WTF: Reviewed by Simon Fraser. Add a HAVE_ACCELERATE flag, true on Apple platforms. * wtf/Platform.h: git-svn-id: https://svn.webkit.org/repository/webkit/trunk@177000 268f45cc-cd09-0410-ab3c-d52691b4dbfc --- PerformanceTests/ChangeLog | 12 ++ .../Interactive/blur-filter-timing.html | 93 ++++++++++++++++ Source/WTF/ChangeLog | 12 ++ Source/WTF/wtf/Platform.h | 4 + Source/WebCore/ChangeLog | 32 ++++++ .../graphics/filters/FEGaussianBlur.cpp | 103 ++++++++++++++++-- .../graphics/filters/FEGaussianBlur.h | 29 ----- 7 files changed, 246 insertions(+), 39 deletions(-) create mode 100644 PerformanceTests/Interactive/blur-filter-timing.html diff --git a/PerformanceTests/ChangeLog b/PerformanceTests/ChangeLog index 159772a35183..879e37a35484 100644 --- a/PerformanceTests/ChangeLog +++ b/PerformanceTests/ChangeLog @@ -1,3 +1,15 @@ +2014-12-08 Dean Jackson + + [Apple] Use Accelerate framework to speed-up FEGaussianBlur + https://bugs.webkit.org/show_bug.cgi?id=139310 + + Reviewed by Simon Fraser. + + Add an interactive performance test that measures the speed of a set + of blur operations on a generated images. + + * Interactive/blur-filter-timing.html: Added. + 2014-11-13 Zalan Bujtas Simple line layout: Add performance test case to measure line layout speed of monolithic text content. diff --git a/PerformanceTests/Interactive/blur-filter-timing.html b/PerformanceTests/Interactive/blur-filter-timing.html new file mode 100644 index 000000000000..2300dc01995f --- /dev/null +++ b/PerformanceTests/Interactive/blur-filter-timing.html @@ -0,0 +1,93 @@ + + + + Timing test for blur filter + + + + + +

+ +

+ + diff --git a/Source/WTF/ChangeLog b/Source/WTF/ChangeLog index 7edd71829223..cc30c050d265 100644 --- a/Source/WTF/ChangeLog +++ b/Source/WTF/ChangeLog @@ -1,3 +1,15 @@ +2014-12-08 Dean Jackson + + [Apple] Use Accelerate framework to speed-up FEGaussianBlur + https://bugs.webkit.org/show_bug.cgi?id=139310 + + + Reviewed by Simon Fraser. + + Add a HAVE_ACCELERATE flag, true on Apple platforms. + + * wtf/Platform.h: + 2014-12-08 Myles C. Maxfield Fix iOS build after r176971. diff --git a/Source/WTF/wtf/Platform.h b/Source/WTF/wtf/Platform.h index 8638dab9bdab..18c86a28b37b 100644 --- a/Source/WTF/wtf/Platform.h +++ b/Source/WTF/wtf/Platform.h @@ -1092,4 +1092,8 @@ #define WTF_USE_MEDIATOOLBOX 1 #endif +#if PLATFORM(COCOA) +#define HAVE_ACCELERATE 1 +#endif + #endif /* WTF_Platform_h */ diff --git a/Source/WebCore/ChangeLog b/Source/WebCore/ChangeLog index e6be1e0e39f3..f9142abe34ef 100644 --- a/Source/WebCore/ChangeLog +++ b/Source/WebCore/ChangeLog @@ -1,3 +1,35 @@ +2014-12-08 Dean Jackson + + [Apple] Use Accelerate framework to speed-up FEGaussianBlur + https://bugs.webkit.org/show_bug.cgi?id=139310 + + + Reviewed by Simon Fraser. + + Using Apple's Accelerate framework provides faster blurs + than the parallel jobs approach, especially since r168577 + which started performing retina-accurate filters. + + Using Accelerate.framework to replace the existing box blur (what + we use to approximate Gaussian blurs) gets about a 20% speedup on + desktop class machines, but between a 2x-6x speedup on iOS hardware. + Obviously this depends on the size of the content being blurred, + but it is still good. + + The change is to intercept the platformApply function on + FEGaussianBlur and send it off to Accelerate. + + There is an interactive performance test: PerformanceTests/Interactive/blur-filter-timing.html + + * platform/graphics/filters/FEGaussianBlur.cpp: + (WebCore::kernelPosition): Move this to a file static function from the .h. + (WebCore::accelerateBoxBlur): The Accelerate implementation. + (WebCore::standardBoxBlur): The default generic/standard implementation. + (WebCore::FEGaussianBlur::platformApplyGeneric): Use accelerate or the default form. + (WebCore::FEGaussianBlur::platformApply): Don't try the parallelJobs approach if Accelerate is available. + * platform/graphics/filters/FEGaussianBlur.h: + (WebCore::FEGaussianBlur::kernelPosition): Deleted. Move into the .cpp. + 2014-12-08 Beth Dakin Copy and Lookup menu items should be disabled when something is not copyable diff --git a/Source/WebCore/platform/graphics/filters/FEGaussianBlur.cpp b/Source/WebCore/platform/graphics/filters/FEGaussianBlur.cpp index e2eefae98676..347c43965707 100644 --- a/Source/WebCore/platform/graphics/filters/FEGaussianBlur.cpp +++ b/Source/WebCore/platform/graphics/filters/FEGaussianBlur.cpp @@ -30,6 +30,10 @@ #include "GraphicsContext.h" #include "TextStream.h" +#if HAVE(ACCELERATE) +#include +#endif + #include #include #include @@ -45,6 +49,34 @@ static const int gMaxKernelSize = 500; namespace WebCore { +inline void kernelPosition(int blurIteration, unsigned& radius, int& deltaLeft, int& deltaRight) +{ + // Check http://www.w3.org/TR/SVG/filters.html#feGaussianBlurElement for details. + switch (blurIteration) { + case 0: + if (!(radius % 2)) { + deltaLeft = radius / 2 - 1; + deltaRight = radius - deltaLeft; + } else { + deltaLeft = radius / 2; + deltaRight = radius - deltaLeft; + } + break; + case 1: + if (!(radius % 2)) { + deltaLeft++; + deltaRight--; + } + break; + case 2: + if (!(radius % 2)) { + deltaRight++; + radius++; + } + break; + } +} + FEGaussianBlur::FEGaussianBlur(Filter* filter, float x, float y, EdgeModeType edgeMode) : FilterEffect(filter) , m_stdX(x) @@ -262,15 +294,51 @@ inline void boxBlur(const Uint8ClampedArray* srcPixelArray, Uint8ClampedArray* d } } -inline void FEGaussianBlur::platformApplyGeneric(Uint8ClampedArray* srcPixelArray, Uint8ClampedArray* tmpPixelArray, unsigned kernelSizeX, unsigned kernelSizeY, IntSize& paintSize) +#if HAVE(ACCELERATE) +inline void accelerateBoxBlur(const Uint8ClampedArray* src, Uint8ClampedArray* dst, unsigned kernelSize, int stride, int effectWidth, int effectHeight) +{ + // We must always use an odd radius. + if (kernelSize % 2 != 1) + kernelSize += 1; + + vImage_Buffer effectInBuffer; + effectInBuffer.data = src->data(); + effectInBuffer.width = effectWidth; + effectInBuffer.height = effectHeight; + effectInBuffer.rowBytes = stride; + + vImage_Buffer effectOutBuffer; + effectOutBuffer.data = dst->data(); + effectOutBuffer.width = effectWidth; + effectOutBuffer.height = effectHeight; + effectOutBuffer.rowBytes = stride; + + // Determine the size of a temporary buffer by calling the function first with a special flag. vImage will return + // the size needed, or an error (which are all negative). + size_t tmpBufferSize = vImageBoxConvolve_ARGB8888(&effectInBuffer, &effectOutBuffer, 0, 0, 0, kernelSize, kernelSize, 0, kvImageEdgeExtend | kvImageGetTempBufferSize); + if (tmpBufferSize <= 0) + return; + + void* tmpBuffer = fastMalloc(tmpBufferSize); + vImageBoxConvolve_ARGB8888(&effectInBuffer, &effectOutBuffer, tmpBuffer, 0, 0, kernelSize, kernelSize, 0, kvImageEdgeExtend); + vImageBoxConvolve_ARGB8888(&effectOutBuffer, &effectInBuffer, tmpBuffer, 0, 0, kernelSize, kernelSize, 0, kvImageEdgeExtend); + vImageBoxConvolve_ARGB8888(&effectInBuffer, &effectOutBuffer, tmpBuffer, 0, 0, kernelSize, kernelSize, 0, kvImageEdgeExtend); + WTF::fastFree(tmpBuffer); + + // The final result should be stored in src. + if (dst == src) { + ASSERT(src->length() == dst->length()); + memcpy(dst->data(), src->data(), src->length()); + } +} +#endif + +inline void standardBoxBlur(Uint8ClampedArray* src, Uint8ClampedArray* dst, unsigned kernelSizeX, unsigned kernelSizeY, int stride, IntSize& paintSize, bool isAlphaImage, EdgeModeType edgeMode) { - int stride = 4 * paintSize.width(); int dxLeft = 0; int dxRight = 0; int dyLeft = 0; int dyRight = 0; - Uint8ClampedArray* src = srcPixelArray; - Uint8ClampedArray* dst = tmpPixelArray; for (int i = 0; i < 3; ++i) { if (kernelSizeX) { @@ -279,9 +347,9 @@ inline void FEGaussianBlur::platformApplyGeneric(Uint8ClampedArray* srcPixelArra if (!isAlphaImage()) boxBlurNEON(src, dst, kernelSizeX, dxLeft, dxRight, 4, stride, paintSize.width(), paintSize.height()); else - boxBlur(src, dst, kernelSizeX, dxLeft, dxRight, 4, stride, paintSize.width(), paintSize.height(), true, m_edgeMode); + boxBlur(src, dst, kernelSizeX, dxLeft, dxRight, 4, stride, paintSize.width(), paintSize.height(), true, edgeMode); #else - boxBlur(src, dst, kernelSizeX, dxLeft, dxRight, 4, stride, paintSize.width(), paintSize.height(), isAlphaImage(), m_edgeMode); + boxBlur(src, dst, kernelSizeX, dxLeft, dxRight, 4, stride, paintSize.width(), paintSize.height(), isAlphaImage, edgeMode); #endif std::swap(src, dst); } @@ -292,20 +360,33 @@ inline void FEGaussianBlur::platformApplyGeneric(Uint8ClampedArray* srcPixelArra if (!isAlphaImage()) boxBlurNEON(src, dst, kernelSizeY, dyLeft, dyRight, stride, 4, paintSize.height(), paintSize.width()); else - boxBlur(src, dst, kernelSizeY, dyLeft, dyRight, stride, 4, paintSize.height(), paintSize.width(), true, m_edgeMode); + boxBlur(src, dst, kernelSizeY, dyLeft, dyRight, stride, 4, paintSize.height(), paintSize.width(), true, edgeMode); #else - boxBlur(src, dst, kernelSizeY, dyLeft, dyRight, stride, 4, paintSize.height(), paintSize.width(), isAlphaImage(), m_edgeMode); + boxBlur(src, dst, kernelSizeY, dyLeft, dyRight, stride, 4, paintSize.height(), paintSize.width(), isAlphaImage, edgeMode); #endif std::swap(src, dst); } } - // The final result should be stored in srcPixelArray. - if (dst == srcPixelArray) { + // The final result should be stored in src. + if (dst == src) { ASSERT(src->length() == dst->length()); memcpy(dst->data(), src->data(), src->length()); } +} + +inline void FEGaussianBlur::platformApplyGeneric(Uint8ClampedArray* srcPixelArray, Uint8ClampedArray* tmpPixelArray, unsigned kernelSizeX, unsigned kernelSizeY, IntSize& paintSize) +{ + int stride = 4 * paintSize.width(); + +#if HAVE(ACCELERATE) + if (kernelSizeX == kernelSizeY && (m_edgeMode == EDGEMODE_NONE || m_edgeMode == EDGEMODE_DUPLICATE)) { + accelerateBoxBlur(srcPixelArray, tmpPixelArray, kernelSizeX, stride, paintSize.width(), paintSize.height()); + return; + } +#endif + standardBoxBlur(srcPixelArray, tmpPixelArray, kernelSizeX, kernelSizeY, stride, paintSize, isAlphaImage(), m_edgeMode); } void FEGaussianBlur::platformApplyWorker(PlatformApplyParameters* parameters) @@ -317,6 +398,7 @@ void FEGaussianBlur::platformApplyWorker(PlatformApplyParameters* parameters) inline void FEGaussianBlur::platformApply(Uint8ClampedArray* srcPixelArray, Uint8ClampedArray* tmpPixelArray, unsigned kernelSizeX, unsigned kernelSizeY, IntSize& paintSize) { +#if !HAVE(ACCELERATE) int scanline = 4 * paintSize.width(); int extraHeight = 3 * kernelSizeY * 0.5f; int optimalThreadNumber = (paintSize.width() * paintSize.height()) / (s_minimalRectDimension + extraHeight * paintSize.width()); @@ -378,6 +460,7 @@ inline void FEGaussianBlur::platformApply(Uint8ClampedArray* srcPixelArray, Uint } // Fallback to single threaded mode. } +#endif // The selection here eventually should happen dynamically on some platforms. platformApplyGeneric(srcPixelArray, tmpPixelArray, kernelSizeX, kernelSizeY, paintSize); diff --git a/Source/WebCore/platform/graphics/filters/FEGaussianBlur.h b/Source/WebCore/platform/graphics/filters/FEGaussianBlur.h index cba3da0cc437..7af542939c67 100644 --- a/Source/WebCore/platform/graphics/filters/FEGaussianBlur.h +++ b/Source/WebCore/platform/graphics/filters/FEGaussianBlur.h @@ -70,7 +70,6 @@ private: FEGaussianBlur(Filter*, float, float, EdgeModeType); - static inline void kernelPosition(int boxBlur, unsigned& std, int& dLeft, int& dRight); inline void platformApply(Uint8ClampedArray* srcPixelArray, Uint8ClampedArray* tmpPixelArray, unsigned kernelSizeX, unsigned kernelSizeY, IntSize& paintSize); inline void platformApplyGeneric(Uint8ClampedArray* srcPixelArray, Uint8ClampedArray* tmpPixelArray, unsigned kernelSizeX, unsigned kernelSizeY, IntSize& paintSize); @@ -80,34 +79,6 @@ private: EdgeModeType m_edgeMode; }; -inline void FEGaussianBlur::kernelPosition(int boxBlur, unsigned& std, int& dLeft, int& dRight) -{ - // check http://www.w3.org/TR/SVG/filters.html#feGaussianBlurElement for details - switch (boxBlur) { - case 0: - if (!(std % 2)) { - dLeft = std / 2 - 1; - dRight = std - dLeft; - } else { - dLeft = std / 2; - dRight = std - dLeft; - } - break; - case 1: - if (!(std % 2)) { - dLeft++; - dRight--; - } - break; - case 2: - if (!(std % 2)) { - dRight++; - std++; - } - break; - } -} - } // namespace WebCore #endif // FEGaussianBlur_h -- 2.36.0