NEONizing forceValidPreMultipliedPixels
authorzherczeg@webkit.org <zherczeg@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Tue, 15 May 2012 13:18:14 +0000 (13:18 +0000)
committerzherczeg@webkit.org <zherczeg@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Tue, 15 May 2012 13:18:14 +0000 (13:18 +0000)
https://bugs.webkit.org/show_bug.cgi?id=86468

Reviewed by Nikolas Zimmermann.

Source/WebCore:

Optimize forceValidPreMultipliedPixels with ARM-NEON intrinsics.

Existing tests cover this feature.

* platform/graphics/filters/FilterEffect.cpp:
(WebCore::FilterEffect::forceValidPreMultipliedPixels):

Source/WTF:

Allow to disable all intrinsics with a single macro.

* wtf/Platform.h:

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@117058 268f45cc-cd09-0410-ab3c-d52691b4dbfc

Source/WTF/ChangeLog
Source/WTF/wtf/Platform.h
Source/WebCore/ChangeLog
Source/WebCore/platform/graphics/filters/FilterEffect.cpp

index 71bc678..253cb79 100644 (file)
@@ -1,3 +1,14 @@
+2012-05-15  Zoltan Herczeg  <zherczeg@webkit.org>
+
+        NEONizing forceValidPreMultipliedPixels
+        https://bugs.webkit.org/show_bug.cgi?id=86468
+
+        Reviewed by Nikolas Zimmermann.
+
+        Allow to disable all intrinsics with a single macro.
+
+        * wtf/Platform.h:
+
 2012-05-14  Andy Estes  <aestes@apple.com>
 
         Add WTF_USE_APPKIT to differentiate platforms that use AppKit.framework from other Darwin platforms
index c6b8269..4491271 100644 (file)
 #define WTF_CPU_ARM_NEON 1
 #endif
 
+#if CPU(ARM_NEON) && (!COMPILER(GCC) || GCC_VERSION_AT_LEAST(4, 7, 0))
+// All NEON intrinsics usage can be disabled by this macro.
+#define HAVE_ARM_NEON_INTRINSICS 1
+#endif
+
 #endif /* ARM */
 
 #if CPU(ARM) || CPU(MIPS) || CPU(SH4) || CPU(SPARC)
index 65ba67a..5d06728 100644 (file)
@@ -1,3 +1,17 @@
+2012-05-15  Zoltan Herczeg  <zherczeg@webkit.org>
+
+        NEONizing forceValidPreMultipliedPixels
+        https://bugs.webkit.org/show_bug.cgi?id=86468
+
+        Reviewed by Nikolas Zimmermann.
+
+        Optimize forceValidPreMultipliedPixels with ARM-NEON intrinsics.
+
+        Existing tests cover this feature.
+
+        * platform/graphics/filters/FilterEffect.cpp:
+        (WebCore::FilterEffect::forceValidPreMultipliedPixels):
+
 2012-05-15  Yury Semikhatsky  <yurys@chromium.org>
 
         Web Inspector: remove unnecessary setTimeout in HeapSnapshotGridNodes.js
index 8dc9185..9701e53 100644 (file)
 #include "TextStream.h"
 #include <wtf/Uint8ClampedArray.h>
 
+#if HAVE(ARM_NEON_INTRINSICS)
+#include <arm_neon.h>
+#endif
+
 namespace WebCore {
 
 FilterEffect::FilterEffect(Filter* filter)
@@ -131,6 +135,26 @@ void FilterEffect::forceValidPreMultipliedPixels()
 
     // We must have four bytes per pixel, and complete pixels
     ASSERT(!(pixelArrayLength % 4));
+
+#if HAVE(ARM_NEON_INTRINSICS)
+    if (pixelArrayLength >= 64) {
+        unsigned char* lastPixel = pixelData + (pixelArrayLength & ~0x3f);
+        do {
+            // Increments pixelData by 64.
+            uint8x16x4_t sixteenPixels = vld4q_u8(pixelData);
+            sixteenPixels.val[0] = vminq_u8(sixteenPixels.val[0], sixteenPixels.val[3]);
+            sixteenPixels.val[1] = vminq_u8(sixteenPixels.val[1], sixteenPixels.val[3]);
+            sixteenPixels.val[2] = vminq_u8(sixteenPixels.val[2], sixteenPixels.val[3]);
+            vst4q_u8(pixelData, sixteenPixels);
+            pixelData += 64;
+        } while (pixelData < lastPixel);
+
+        pixelArrayLength &= 0x3f;
+        if (!pixelArrayLength)
+            return;
+    }
+#endif
+
     int numPixels = pixelArrayLength / 4;
 
     // Iterate over each pixel, checking alpha and adjusting color components if necessary