97418f2c10d1b69df4db5328813ebbdd54aa7ce7
[WebKit-https.git] / Source / WebCore / platform / graphics / cpu / arm / filters / FEBlendNEON.h
1 /*
2  * Copyright (C) 2012 University of Szeged
3  * Copyright (C) 2012 Gabor Rapcsanyi
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL UNIVERSITY OF SZEGED OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26
27 #ifndef FEBlendNEON_h
28 #define FEBlendNEON_h
29
30 #include <wtf/Platform.h>
31
32 #if ENABLE(FILTERS) && HAVE(ARM_NEON_INTRINSICS)
33
34 #include "FEBlend.h"
35 #include <arm_neon.h>
36
37 namespace WebCore {
38
39 class FEBlendUtilitiesNEON {
40 public:
41     static inline uint16x8_t div255(uint16x8_t num, uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne)
42     {
43         uint16x8_t quotient = vshrq_n_u16(num, 8);
44         uint16x8_t remainder = vaddq_u16(vsubq_u16(num, vmulq_u16(sixteenConst255, quotient)), sixteenConstOne);
45         return vaddq_u16(quotient, vshrq_n_u16(remainder, 8));
46     }
47
48     static inline uint16x8_t normal(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t alphaA, uint16x8_t,
49                                     uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne)
50     {
51         uint16x8_t tmp1 = vsubq_u16(sixteenConst255, alphaA);
52         uint16x8_t tmp2 = vmulq_u16(tmp1, pixelB);
53         uint16x8_t tmp3 = div255(tmp2, sixteenConst255, sixteenConstOne);
54         return vaddq_u16(tmp3, pixelA);
55     }
56
57     static inline uint16x8_t multiply(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t alphaA, uint16x8_t alphaB,
58                                       uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne)
59     {
60         uint16x8_t tmp1 = vsubq_u16(sixteenConst255, alphaA);
61         uint16x8_t tmp2 = vmulq_u16(tmp1, pixelB);
62         uint16x8_t tmp3 = vaddq_u16(vsubq_u16(sixteenConst255, alphaB), pixelB);
63         uint16x8_t tmp4 = vmulq_u16(tmp3, pixelA);
64         uint16x8_t tmp5 = vaddq_u16(tmp2, tmp4);
65         return div255(tmp5, sixteenConst255, sixteenConstOne);
66     }
67
68     static inline uint16x8_t screen(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t, uint16x8_t,
69                                     uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne)
70     {
71         uint16x8_t tmp1 = vaddq_u16(pixelA, pixelB);
72         uint16x8_t tmp2 = vmulq_u16(pixelA, pixelB);
73         uint16x8_t tmp3 = div255(tmp2, sixteenConst255, sixteenConstOne);
74         return vsubq_u16(tmp1, tmp3);
75     }
76
77     static inline uint16x8_t darken(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t alphaA, uint16x8_t alphaB,
78                                     uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne)
79     {
80         uint16x8_t tmp1 = vsubq_u16(sixteenConst255, alphaA);
81         uint16x8_t tmp2 = vmulq_u16(tmp1, pixelB);
82         uint16x8_t tmp3 = div255(tmp2, sixteenConst255, sixteenConstOne);
83         uint16x8_t tmp4 = vaddq_u16(tmp3, pixelA);
84
85         uint16x8_t tmp5 = vsubq_u16(sixteenConst255, alphaB);
86         uint16x8_t tmp6 = vmulq_u16(tmp5, pixelA);
87         uint16x8_t tmp7 = div255(tmp6, sixteenConst255, sixteenConstOne);
88         uint16x8_t tmp8 = vaddq_u16(tmp7, pixelB);
89
90         return vminq_u16(tmp4, tmp8);
91     }
92
93     static inline uint16x8_t lighten(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t alphaA, uint16x8_t alphaB,
94                                      uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne)
95     {
96         uint16x8_t tmp1 = vsubq_u16(sixteenConst255, alphaA);
97         uint16x8_t tmp2 = vmulq_u16(tmp1, pixelB);
98         uint16x8_t tmp3 = div255(tmp2, sixteenConst255, sixteenConstOne);
99         uint16x8_t tmp4 = vaddq_u16(tmp3, pixelA);
100
101         uint16x8_t tmp5 = vsubq_u16(sixteenConst255, alphaB);
102         uint16x8_t tmp6 = vmulq_u16(tmp5, pixelA);
103         uint16x8_t tmp7 = div255(tmp6, sixteenConst255, sixteenConstOne);
104         uint16x8_t tmp8 = vaddq_u16(tmp7, pixelB);
105
106         return vmaxq_u16(tmp4, tmp8);
107     }
108 };
109
110 void FEBlend::platformApplyNEON(unsigned char* srcPixelArrayA, unsigned char* srcPixelArrayB, unsigned char* dstPixelArray,
111                                 unsigned colorArrayLength)
112 {
113     uint8_t* sourcePixelA = reinterpret_cast<uint8_t*>(srcPixelArrayA);
114     uint8_t* sourcePixelB = reinterpret_cast<uint8_t*>(srcPixelArrayB);
115     uint8_t* destinationPixel = reinterpret_cast<uint8_t*>(dstPixelArray);
116
117     uint16x8_t sixteenConst255 = vdupq_n_u16(255);
118     uint16x8_t sixteenConstOne = vdupq_n_u16(1);
119
120     unsigned colorOffset = 0;
121     while (colorOffset < colorArrayLength) {
122         unsigned char alphaA1 = srcPixelArrayA[colorOffset + 3];
123         unsigned char alphaB1 = srcPixelArrayB[colorOffset + 3];
124         unsigned char alphaA2 = srcPixelArrayA[colorOffset + 7];
125         unsigned char alphaB2 = srcPixelArrayB[colorOffset + 7];
126
127         uint16x8_t doubblePixelA = vmovl_u8(vld1_u8(sourcePixelA + colorOffset));
128         uint16x8_t doubblePixelB = vmovl_u8(vld1_u8(sourcePixelB + colorOffset));
129         uint16x8_t alphaA = vcombine_u16(vdup_n_u16(alphaA1), vdup_n_u16(alphaA2));
130         uint16x8_t alphaB = vcombine_u16(vdup_n_u16(alphaB1), vdup_n_u16(alphaB2));
131
132         uint16x8_t result;
133         switch (m_mode) {
134         case FEBLEND_MODE_NORMAL:
135             result = FEBlendUtilitiesNEON::normal(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne);
136             break;
137         case FEBLEND_MODE_MULTIPLY:
138             result = FEBlendUtilitiesNEON::multiply(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne);
139             break;
140         case FEBLEND_MODE_SCREEN:
141             result = FEBlendUtilitiesNEON::screen(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne);
142             break;
143         case FEBLEND_MODE_DARKEN:
144             result = FEBlendUtilitiesNEON::darken(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne);
145             break;
146         case FEBLEND_MODE_LIGHTEN:
147             result = FEBlendUtilitiesNEON::lighten(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne);
148             break;
149         case FEBLEND_MODE_UNKNOWN:
150         default:
151             result = vdupq_n_u16(0);
152             break;
153         }
154
155         vst1_u8(destinationPixel + colorOffset, vmovn_u16(result));
156
157         unsigned char alphaR1 = 255 - ((255 - alphaA1) * (255 - alphaB1)) / 255;
158         unsigned char alphaR2 = 255 - ((255 - alphaA2) * (255 - alphaB2)) / 255;
159
160         dstPixelArray[colorOffset + 3] = alphaR1;
161         dstPixelArray[colorOffset + 7] = alphaR2;
162
163         colorOffset += 8;
164         if (colorOffset > colorArrayLength) {
165             ASSERT(colorOffset - 4 == colorArrayLength);
166             colorOffset = colorArrayLength - 8;
167         }
168     }
169 }
170
171 } // namespace WebCore
172
173 #endif // ENABLE(FILTERS) && HAVE(ARM_NEON_INTRINSICS)
174
175 #endif // FEBlendNEON_h