Some FilterEffect cleanup and logging
[WebKit-https.git] / Source / WebCore / platform / graphics / cpu / arm / filters / FEBlendNEON.h
1 /*
2  * Copyright (C) 2012 University of Szeged
3  * Copyright (C) 2012 Gabor Rapcsanyi
4  * Copyright (C) 2014 Adobe Systems Incorporated. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY
16  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL UNIVERSITY OF SZEGED OR
19  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27
28 #ifndef FEBlendNEON_h
29 #define FEBlendNEON_h
30
31 #if HAVE(ARM_NEON_INTRINSICS)
32
33 #include "FEBlend.h"
34 #include <arm_neon.h>
35
36 namespace WebCore {
37
38 class FEBlendUtilitiesNEON {
39 public:
40     static inline uint16x8_t div255(uint16x8_t num, uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne)
41     {
42         uint16x8_t quotient = vshrq_n_u16(num, 8);
43         uint16x8_t remainder = vaddq_u16(vsubq_u16(num, vmulq_u16(sixteenConst255, quotient)), sixteenConstOne);
44         return vaddq_u16(quotient, vshrq_n_u16(remainder, 8));
45     }
46
47     static inline uint16x8_t normal(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t alphaA, uint16x8_t,
48                                     uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne)
49     {
50         uint16x8_t tmp1 = vsubq_u16(sixteenConst255, alphaA);
51         uint16x8_t tmp2 = vmulq_u16(tmp1, pixelB);
52         uint16x8_t tmp3 = div255(tmp2, sixteenConst255, sixteenConstOne);
53         return vaddq_u16(tmp3, pixelA);
54     }
55
56     static inline uint16x8_t multiply(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t alphaA, uint16x8_t alphaB,
57                                       uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne)
58     {
59         uint16x8_t tmp1 = vsubq_u16(sixteenConst255, alphaA);
60         uint16x8_t tmp2 = vmulq_u16(tmp1, pixelB);
61         uint16x8_t tmp3 = vaddq_u16(vsubq_u16(sixteenConst255, alphaB), pixelB);
62         uint16x8_t tmp4 = vmulq_u16(tmp3, pixelA);
63         uint16x8_t tmp5 = vaddq_u16(tmp2, tmp4);
64         return div255(tmp5, sixteenConst255, sixteenConstOne);
65     }
66
67     static inline uint16x8_t screen(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t, uint16x8_t,
68                                     uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne)
69     {
70         uint16x8_t tmp1 = vaddq_u16(pixelA, pixelB);
71         uint16x8_t tmp2 = vmulq_u16(pixelA, pixelB);
72         uint16x8_t tmp3 = div255(tmp2, sixteenConst255, sixteenConstOne);
73         return vsubq_u16(tmp1, tmp3);
74     }
75
76     static inline uint16x8_t darken(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t alphaA, uint16x8_t alphaB,
77                                     uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne)
78     {
79         uint16x8_t tmp1 = vsubq_u16(sixteenConst255, alphaA);
80         uint16x8_t tmp2 = vmulq_u16(tmp1, pixelB);
81         uint16x8_t tmp3 = div255(tmp2, sixteenConst255, sixteenConstOne);
82         uint16x8_t tmp4 = vaddq_u16(tmp3, pixelA);
83
84         uint16x8_t tmp5 = vsubq_u16(sixteenConst255, alphaB);
85         uint16x8_t tmp6 = vmulq_u16(tmp5, pixelA);
86         uint16x8_t tmp7 = div255(tmp6, sixteenConst255, sixteenConstOne);
87         uint16x8_t tmp8 = vaddq_u16(tmp7, pixelB);
88
89         return vminq_u16(tmp4, tmp8);
90     }
91
92     static inline uint16x8_t lighten(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t alphaA, uint16x8_t alphaB,
93                                      uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne)
94     {
95         uint16x8_t tmp1 = vsubq_u16(sixteenConst255, alphaA);
96         uint16x8_t tmp2 = vmulq_u16(tmp1, pixelB);
97         uint16x8_t tmp3 = div255(tmp2, sixteenConst255, sixteenConstOne);
98         uint16x8_t tmp4 = vaddq_u16(tmp3, pixelA);
99
100         uint16x8_t tmp5 = vsubq_u16(sixteenConst255, alphaB);
101         uint16x8_t tmp6 = vmulq_u16(tmp5, pixelA);
102         uint16x8_t tmp7 = div255(tmp6, sixteenConst255, sixteenConstOne);
103         uint16x8_t tmp8 = vaddq_u16(tmp7, pixelB);
104
105         return vmaxq_u16(tmp4, tmp8);
106     }
107 };
108
109 void FEBlend::platformApplySoftware()
110 {
111     FilterEffect* in = inputEffect(0);
112     FilterEffect* in2 = inputEffect(1);
113
114     Uint8ClampedArray* dstPixelArray = createPremultipliedImageResult();
115     if (!dstPixelArray)
116         return;
117
118     IntRect effectADrawingRect = requestedRegionOfInputImageData(in->absolutePaintRect());
119     RefPtr<Uint8ClampedArray> srcPixelArrayA = in->premultipliedResult(effectADrawingRect);
120
121     IntRect effectBDrawingRect = requestedRegionOfInputImageData(in2->absolutePaintRect());
122     RefPtr<Uint8ClampedArray> srcPixelArrayB = in2->premultipliedResult(effectBDrawingRect);
123
124     unsigned pixelArrayLength = srcPixelArrayA->length();
125     ASSERT(pixelArrayLength == srcPixelArrayB->length());
126
127     if (pixelArrayLength >= 8) {
128         platformApplyNEON(srcPixelArrayA->data(), srcPixelArrayB->data(), dstPixelArray->data(), pixelArrayLength);
129         return;
130     }
131     // If there is just one pixel we expand it to two.
132     ASSERT(pixelArrayLength > 0);
133     uint32_t sourceA[2] = {0, 0};
134     uint32_t sourceBAndDest[2] = {0, 0};
135
136     sourceA[0] = reinterpret_cast<uint32_t*>(srcPixelArrayA->data())[0];
137     sourceBAndDest[0] = reinterpret_cast<uint32_t*>(srcPixelArrayB->data())[0];
138     platformApplyNEON(reinterpret_cast<uint8_t*>(sourceA), reinterpret_cast<uint8_t*>(sourceBAndDest), reinterpret_cast<uint8_t*>(sourceBAndDest), 8);
139     reinterpret_cast<uint32_t*>(dstPixelArray->data())[0] = sourceBAndDest[0];
140 }
141
142 void FEBlend::platformApplyNEON(unsigned char* srcPixelArrayA, unsigned char* srcPixelArrayB, unsigned char* dstPixelArray,
143                                 unsigned colorArrayLength)
144 {
145     uint8_t* sourcePixelA = reinterpret_cast<uint8_t*>(srcPixelArrayA);
146     uint8_t* sourcePixelB = reinterpret_cast<uint8_t*>(srcPixelArrayB);
147     uint8_t* destinationPixel = reinterpret_cast<uint8_t*>(dstPixelArray);
148
149     uint16x8_t sixteenConst255 = vdupq_n_u16(255);
150     uint16x8_t sixteenConstOne = vdupq_n_u16(1);
151
152     unsigned colorOffset = 0;
153     while (colorOffset < colorArrayLength) {
154         unsigned char alphaA1 = srcPixelArrayA[colorOffset + 3];
155         unsigned char alphaB1 = srcPixelArrayB[colorOffset + 3];
156         unsigned char alphaA2 = srcPixelArrayA[colorOffset + 7];
157         unsigned char alphaB2 = srcPixelArrayB[colorOffset + 7];
158
159         uint16x8_t doubblePixelA = vmovl_u8(vld1_u8(sourcePixelA + colorOffset));
160         uint16x8_t doubblePixelB = vmovl_u8(vld1_u8(sourcePixelB + colorOffset));
161         uint16x8_t alphaA = vcombine_u16(vdup_n_u16(alphaA1), vdup_n_u16(alphaA2));
162         uint16x8_t alphaB = vcombine_u16(vdup_n_u16(alphaB1), vdup_n_u16(alphaB2));
163
164         uint16x8_t result;
165         switch (m_mode) {
166         case BlendModeNormal:
167             result = FEBlendUtilitiesNEON::normal(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne);
168             break;
169         case BlendModeMultiply:
170             result = FEBlendUtilitiesNEON::multiply(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne);
171             break;
172         case BlendModeScreen:
173             result = FEBlendUtilitiesNEON::screen(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne);
174             break;
175         case BlendModeDarken:
176             result = FEBlendUtilitiesNEON::darken(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne);
177             break;
178         case BlendModeLighten:
179             result = FEBlendUtilitiesNEON::lighten(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne);
180             break;
181         default:
182             result = vdupq_n_u16(0);
183             break;
184         }
185
186         vst1_u8(destinationPixel + colorOffset, vmovn_u16(result));
187
188         unsigned char alphaR1 = 255 - ((255 - alphaA1) * (255 - alphaB1)) / 255;
189         unsigned char alphaR2 = 255 - ((255 - alphaA2) * (255 - alphaB2)) / 255;
190
191         dstPixelArray[colorOffset + 3] = alphaR1;
192         dstPixelArray[colorOffset + 7] = alphaR2;
193
194         colorOffset += 8;
195         if (colorOffset > colorArrayLength) {
196             ASSERT(colorOffset - 4 == colorArrayLength);
197             colorOffset = colorArrayLength - 8;
198         }
199     }
200 }
201
202 } // namespace WebCore
203
204 #endif // HAVE(ARM_NEON_INTRINSICS)
205
206 #endif // FEBlendNEON_h