2 * Copyright (C) 2004, 2005, 2006, 2007 Nikolas Zimmermann <zimmermann@kde.org>
3 * Copyright (C) 2004, 2005 Rob Buis <buis@kde.org>
4 * Copyright (C) 2005 Eric Seidel <eric@webkit.org>
5 * Copyright (C) 2009 Dirk Schulze <krit@webkit.org>
6 * Copyright (C) 2010 Igalia, S.L.
7 * Copyright (C) Research In Motion Limited 2010. All rights reserved.
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Library General Public
11 * License as published by the Free Software Foundation; either
12 * version 2 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Library General Public License for more details.
19 * You should have received a copy of the GNU Library General Public License
20 * along with this library; see the file COPYING.LIB. If not, write to
21 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
22 * Boston, MA 02110-1301, USA.
26 #include "FEGaussianBlur.h"
28 #include "FEGaussianBlurNEON.h"
30 #include "GraphicsContext.h"
31 #include "TextStream.h"
34 #include <Accelerate/Accelerate.h>
37 #include <runtime/JSCInlines.h>
38 #include <runtime/TypedArrayInlines.h>
39 #include <runtime/Uint8ClampedArray.h>
40 #include <wtf/MathExtras.h>
41 #include <wtf/ParallelJobs.h>
43 static inline float gaussianKernelFactor()
45 return 3 / 4.f * sqrtf(2 * piFloat);
48 static const int gMaxKernelSize = 500;
52 inline void kernelPosition(int blurIteration, unsigned& radius, int& deltaLeft, int& deltaRight)
54 // Check http://www.w3.org/TR/SVG/filters.html#feGaussianBlurElement for details.
55 switch (blurIteration) {
58 deltaLeft = radius / 2 - 1;
59 deltaRight = radius - deltaLeft;
61 deltaLeft = radius / 2;
62 deltaRight = radius - deltaLeft;
80 FEGaussianBlur::FEGaussianBlur(Filter* filter, float x, float y, EdgeModeType edgeMode)
81 : FilterEffect(filter)
84 , m_edgeMode(edgeMode)
88 PassRefPtr<FEGaussianBlur> FEGaussianBlur::create(Filter* filter, float x, float y, EdgeModeType edgeMode)
90 return adoptRef(new FEGaussianBlur(filter, x, y, edgeMode));
93 float FEGaussianBlur::stdDeviationX() const
98 void FEGaussianBlur::setStdDeviationX(float x)
103 float FEGaussianBlur::stdDeviationY() const
108 void FEGaussianBlur::setStdDeviationY(float y)
113 EdgeModeType FEGaussianBlur::edgeMode() const
118 void FEGaussianBlur::setEdgeMode(EdgeModeType edgeMode)
120 m_edgeMode = edgeMode;
123 // This function only operates on Alpha channel.
124 inline void boxBlurAlphaOnly(const Uint8ClampedArray* srcPixelArray, Uint8ClampedArray* dstPixelArray,
125 unsigned dx, int& dxLeft, int& dxRight, int& stride, int& strideLine, int& effectWidth, int& effectHeight, const int& maxKernelSize)
127 unsigned char* srcData = srcPixelArray->data();
128 unsigned char* dstData = dstPixelArray->data();
129 // Memory alignment is: RGBA, zero-index based.
130 const int channel = 3;
132 for (int y = 0; y < effectHeight; ++y) {
133 int line = y * strideLine;
137 for (int i = 0; i < maxKernelSize; ++i) {
138 unsigned offset = line + i * stride;
139 unsigned char* srcPtr = srcData + offset;
140 sum += srcPtr[channel];
144 for (int x = 0; x < effectWidth; ++x) {
145 unsigned pixelByteOffset = line + x * stride + channel;
146 unsigned char* dstPtr = dstData + pixelByteOffset;
147 *dstPtr = static_cast<unsigned char>(sum / dx);
151 unsigned leftOffset = pixelByteOffset - dxLeft * stride;
152 unsigned char* srcPtr = srcData + leftOffset;
156 if (x + dxRight < effectWidth) {
157 unsigned rightOffset = pixelByteOffset + dxRight * stride;
158 unsigned char* srcPtr = srcData + rightOffset;
165 inline void boxBlur(const Uint8ClampedArray* srcPixelArray, Uint8ClampedArray* dstPixelArray,
166 unsigned dx, int dxLeft, int dxRight, int stride, int strideLine, int effectWidth, int effectHeight, bool alphaImage, EdgeModeType edgeMode)
168 const int maxKernelSize = std::min(dxRight, effectWidth);
170 return boxBlurAlphaOnly(srcPixelArray, dstPixelArray, dx, dxLeft, dxRight, stride, strideLine,
171 effectWidth, effectHeight, maxKernelSize);
174 unsigned char* srcData = srcPixelArray->data();
175 unsigned char* dstData = dstPixelArray->data();
177 // Concerning the array width/length: it is Element size + Margin + Border. The number of pixels will be
178 // P = width * height * channels.
179 for (int y = 0; y < effectHeight; ++y) {
180 int line = y * strideLine;
181 int sumR = 0, sumG = 0, sumB = 0, sumA = 0;
183 if (edgeMode == EDGEMODE_NONE) {
185 for (int i = 0; i < maxKernelSize; ++i) {
186 unsigned offset = line + i * stride;
187 unsigned char* srcPtr = srcData + offset;
195 for (int x = 0; x < effectWidth; ++x) {
196 unsigned pixelByteOffset = line + x * stride;
197 unsigned char* dstPtr = dstData + pixelByteOffset;
199 *dstPtr++ = static_cast<unsigned char>(sumR / dx);
200 *dstPtr++ = static_cast<unsigned char>(sumG / dx);
201 *dstPtr++ = static_cast<unsigned char>(sumB / dx);
202 *dstPtr = static_cast<unsigned char>(sumA / dx);
206 unsigned leftOffset = pixelByteOffset - dxLeft * stride;
207 unsigned char* srcPtr = srcData + leftOffset;
214 if (x + dxRight < effectWidth) {
215 unsigned rightOffset = pixelByteOffset + dxRight * stride;
216 unsigned char* srcPtr = srcData + rightOffset;
225 // FIXME: Add support for 'wrap' here.
226 // Get edge values for edgeMode 'duplicate'.
227 unsigned char* edgeValueLeft = srcData + line;
228 unsigned char* edgeValueRight = srcData + (line + (effectWidth - 1) * stride);
231 for (int i = dxLeft * -1; i < dxRight; ++i) {
232 // Is this right for negative values of 'i'?
233 unsigned offset = line + i * stride;
234 unsigned char* srcPtr = srcData + offset;
237 sumR += edgeValueLeft[0];
238 sumG += edgeValueLeft[1];
239 sumB += edgeValueLeft[2];
240 sumA += edgeValueLeft[3];
241 } else if (i >= effectWidth) {
242 sumR += edgeValueRight[0];
243 sumG += edgeValueRight[1];
244 sumB += edgeValueRight[2];
245 sumA += edgeValueRight[3];
255 for (int x = 0; x < effectWidth; ++x) {
256 unsigned pixelByteOffset = line + x * stride;
257 unsigned char* dstPtr = dstData + pixelByteOffset;
259 *dstPtr++ = static_cast<unsigned char>(sumR / dx);
260 *dstPtr++ = static_cast<unsigned char>(sumG / dx);
261 *dstPtr++ = static_cast<unsigned char>(sumB / dx);
262 *dstPtr = static_cast<unsigned char>(sumA / dx);
266 sumR -= edgeValueLeft[0];
267 sumG -= edgeValueLeft[1];
268 sumB -= edgeValueLeft[2];
269 sumA -= edgeValueLeft[3];
271 unsigned leftOffset = pixelByteOffset - dxLeft * stride;
272 unsigned char* srcPtr = srcData + leftOffset;
279 if (x + dxRight >= effectWidth) {
280 sumR += edgeValueRight[0];
281 sumG += edgeValueRight[1];
282 sumB += edgeValueRight[2];
283 sumA += edgeValueRight[3];
285 unsigned rightOffset = pixelByteOffset + dxRight * stride;
286 unsigned char* srcPtr = srcData + rightOffset;
298 inline void accelerateBoxBlur(const Uint8ClampedArray* src, Uint8ClampedArray* dst, unsigned kernelSize, int stride, int effectWidth, int effectHeight)
300 // We must always use an odd radius.
301 if (kernelSize % 2 != 1)
304 vImage_Buffer effectInBuffer;
305 effectInBuffer.data = src->data();
306 effectInBuffer.width = effectWidth;
307 effectInBuffer.height = effectHeight;
308 effectInBuffer.rowBytes = stride;
310 vImage_Buffer effectOutBuffer;
311 effectOutBuffer.data = dst->data();
312 effectOutBuffer.width = effectWidth;
313 effectOutBuffer.height = effectHeight;
314 effectOutBuffer.rowBytes = stride;
316 // Determine the size of a temporary buffer by calling the function first with a special flag. vImage will return
317 // the size needed, or an error (which are all negative).
318 size_t tmpBufferSize = vImageBoxConvolve_ARGB8888(&effectInBuffer, &effectOutBuffer, 0, 0, 0, kernelSize, kernelSize, 0, kvImageEdgeExtend | kvImageGetTempBufferSize);
319 if (tmpBufferSize <= 0)
322 void* tmpBuffer = fastMalloc(tmpBufferSize);
323 vImageBoxConvolve_ARGB8888(&effectInBuffer, &effectOutBuffer, tmpBuffer, 0, 0, kernelSize, kernelSize, 0, kvImageEdgeExtend);
324 vImageBoxConvolve_ARGB8888(&effectOutBuffer, &effectInBuffer, tmpBuffer, 0, 0, kernelSize, kernelSize, 0, kvImageEdgeExtend);
325 vImageBoxConvolve_ARGB8888(&effectInBuffer, &effectOutBuffer, tmpBuffer, 0, 0, kernelSize, kernelSize, 0, kvImageEdgeExtend);
326 WTF::fastFree(tmpBuffer);
328 // The final result should be stored in src.
330 ASSERT(src->length() == dst->length());
331 memcpy(dst->data(), src->data(), src->length());
336 inline void standardBoxBlur(Uint8ClampedArray* src, Uint8ClampedArray* dst, unsigned kernelSizeX, unsigned kernelSizeY, int stride, IntSize& paintSize, bool isAlphaImage, EdgeModeType edgeMode)
343 for (int i = 0; i < 3; ++i) {
345 kernelPosition(i, kernelSizeX, dxLeft, dxRight);
346 #if HAVE(ARM_NEON_INTRINSICS)
348 boxBlurNEON(src, dst, kernelSizeX, dxLeft, dxRight, 4, stride, paintSize.width(), paintSize.height());
350 boxBlur(src, dst, kernelSizeX, dxLeft, dxRight, 4, stride, paintSize.width(), paintSize.height(), true, edgeMode);
352 boxBlur(src, dst, kernelSizeX, dxLeft, dxRight, 4, stride, paintSize.width(), paintSize.height(), isAlphaImage, edgeMode);
358 kernelPosition(i, kernelSizeY, dyLeft, dyRight);
359 #if HAVE(ARM_NEON_INTRINSICS)
361 boxBlurNEON(src, dst, kernelSizeY, dyLeft, dyRight, stride, 4, paintSize.height(), paintSize.width());
363 boxBlur(src, dst, kernelSizeY, dyLeft, dyRight, stride, 4, paintSize.height(), paintSize.width(), true, edgeMode);
365 boxBlur(src, dst, kernelSizeY, dyLeft, dyRight, stride, 4, paintSize.height(), paintSize.width(), isAlphaImage, edgeMode);
371 // The final result should be stored in src.
373 ASSERT(src->length() == dst->length());
374 memcpy(dst->data(), src->data(), src->length());
378 inline void FEGaussianBlur::platformApplyGeneric(Uint8ClampedArray* srcPixelArray, Uint8ClampedArray* tmpPixelArray, unsigned kernelSizeX, unsigned kernelSizeY, IntSize& paintSize)
380 int stride = 4 * paintSize.width();
383 if (kernelSizeX == kernelSizeY && (m_edgeMode == EDGEMODE_NONE || m_edgeMode == EDGEMODE_DUPLICATE)) {
384 accelerateBoxBlur(srcPixelArray, tmpPixelArray, kernelSizeX, stride, paintSize.width(), paintSize.height());
389 standardBoxBlur(srcPixelArray, tmpPixelArray, kernelSizeX, kernelSizeY, stride, paintSize, isAlphaImage(), m_edgeMode);
392 void FEGaussianBlur::platformApplyWorker(PlatformApplyParameters* parameters)
394 IntSize paintSize(parameters->width, parameters->height);
395 parameters->filter->platformApplyGeneric(parameters->srcPixelArray.get(), parameters->dstPixelArray.get(),
396 parameters->kernelSizeX, parameters->kernelSizeY, paintSize);
399 inline void FEGaussianBlur::platformApply(Uint8ClampedArray* srcPixelArray, Uint8ClampedArray* tmpPixelArray, unsigned kernelSizeX, unsigned kernelSizeY, IntSize& paintSize)
401 #if !HAVE(ACCELERATE)
402 int scanline = 4 * paintSize.width();
403 int extraHeight = 3 * kernelSizeY * 0.5f;
404 int optimalThreadNumber = (paintSize.width() * paintSize.height()) / (s_minimalRectDimension + extraHeight * paintSize.width());
406 if (optimalThreadNumber > 1) {
407 WTF::ParallelJobs<PlatformApplyParameters> parallelJobs(&platformApplyWorker, optimalThreadNumber);
409 int jobs = parallelJobs.numberOfJobs();
411 // Split the job into "blockHeight"-sized jobs but there a few jobs that need to be slightly larger since
412 // blockHeight * jobs < total size. These extras are handled by the remainder "jobsWithExtra".
413 const int blockHeight = paintSize.height() / jobs;
414 const int jobsWithExtra = paintSize.height() % jobs;
417 for (int job = 0; job < jobs; job++) {
418 PlatformApplyParameters& params = parallelJobs.parameter(job);
419 params.filter = this;
421 int startY = !job ? 0 : currentY - extraHeight;
422 currentY += job < jobsWithExtra ? blockHeight + 1 : blockHeight;
423 int endY = job == jobs - 1 ? currentY : currentY + extraHeight;
425 int blockSize = (endY - startY) * scanline;
427 params.srcPixelArray = srcPixelArray;
428 params.dstPixelArray = tmpPixelArray;
430 params.srcPixelArray = Uint8ClampedArray::createUninitialized(blockSize);
431 params.dstPixelArray = Uint8ClampedArray::createUninitialized(blockSize);
432 memcpy(params.srcPixelArray->data(), srcPixelArray->data() + startY * scanline, blockSize);
435 params.width = paintSize.width();
436 params.height = endY - startY;
437 params.kernelSizeX = kernelSizeX;
438 params.kernelSizeY = kernelSizeY;
441 parallelJobs.execute();
443 // Copy together the parts of the image.
445 for (int job = 1; job < jobs; job++) {
446 PlatformApplyParameters& params = parallelJobs.parameter(job);
448 int destinationOffset;
450 int adjustedBlockHeight = job < jobsWithExtra ? blockHeight + 1 : blockHeight;
452 currentY += adjustedBlockHeight;
453 sourceOffset = extraHeight * scanline;
454 destinationOffset = currentY * scanline;
455 size = adjustedBlockHeight * scanline;
457 memcpy(srcPixelArray->data() + destinationOffset, params.srcPixelArray->data() + sourceOffset, size);
461 // Fallback to single threaded mode.
465 // The selection here eventually should happen dynamically on some platforms.
466 platformApplyGeneric(srcPixelArray, tmpPixelArray, kernelSizeX, kernelSizeY, paintSize);
469 IntSize FEGaussianBlur::calculateUnscaledKernelSize(const FloatPoint& stdDeviation)
471 ASSERT(stdDeviation.x() >= 0 && stdDeviation.y() >= 0);
474 // Limit the kernel size to 500. A bigger radius won't make a big difference for the result image but
475 // inflates the absolute paint rect too much. This is compatible with Firefox' behavior.
476 if (stdDeviation.x()) {
477 int size = std::max<unsigned>(2, static_cast<unsigned>(floorf(stdDeviation.x() * gaussianKernelFactor() + 0.5f)));
478 kernelSize.setWidth(std::min(size, gMaxKernelSize));
481 if (stdDeviation.y()) {
482 int size = std::max<unsigned>(2, static_cast<unsigned>(floorf(stdDeviation.y() * gaussianKernelFactor() + 0.5f)));
483 kernelSize.setHeight(std::min(size, gMaxKernelSize));
489 IntSize FEGaussianBlur::calculateKernelSize(const Filter& filter, const FloatPoint& stdDeviation)
491 FloatPoint stdFilterScaled(filter.applyHorizontalScale(stdDeviation.x()), filter.applyVerticalScale(stdDeviation.y()));
492 return calculateUnscaledKernelSize(stdFilterScaled);
495 void FEGaussianBlur::determineAbsolutePaintRect()
497 IntSize kernelSize = calculateKernelSize(filter(), FloatPoint(m_stdX, m_stdY));
499 FloatRect absolutePaintRect = inputEffect(0)->absolutePaintRect();
500 // Edge modes other than 'none' do not inflate the affected paint rect.
501 if (m_edgeMode != EDGEMODE_NONE) {
502 setAbsolutePaintRect(enclosingIntRect(absolutePaintRect));
506 // We take the half kernel size and multiply it with three, because we run box blur three times.
507 absolutePaintRect.inflateX(3 * kernelSize.width() * 0.5f);
508 absolutePaintRect.inflateY(3 * kernelSize.height() * 0.5f);
511 absolutePaintRect.intersect(maxEffectRect());
513 absolutePaintRect.unite(maxEffectRect());
515 setAbsolutePaintRect(enclosingIntRect(absolutePaintRect));
518 void FEGaussianBlur::platformApplySoftware()
520 FilterEffect* in = inputEffect(0);
522 Uint8ClampedArray* srcPixelArray = createPremultipliedImageResult();
526 setIsAlphaImage(in->isAlphaImage());
528 IntRect effectDrawingRect = requestedRegionOfInputImageData(in->absolutePaintRect());
529 in->copyPremultipliedImage(srcPixelArray, effectDrawingRect);
531 if (!m_stdX && !m_stdY)
534 IntSize kernelSize = calculateKernelSize(filter(), FloatPoint(m_stdX, m_stdY));
535 kernelSize.scale(filter().filterScale());
537 IntSize paintSize = absolutePaintRect().size();
538 paintSize.scale(filter().filterScale());
539 RefPtr<Uint8ClampedArray> tmpImageData = Uint8ClampedArray::createUninitialized(paintSize.width() * paintSize.height() * 4);
540 Uint8ClampedArray* tmpPixelArray = tmpImageData.get();
542 platformApply(srcPixelArray, tmpPixelArray, kernelSize.width(), kernelSize.height(), paintSize);
545 void FEGaussianBlur::dump()
549 TextStream& FEGaussianBlur::externalRepresentation(TextStream& ts, int indent) const
551 writeIndent(ts, indent);
552 ts << "[feGaussianBlur";
553 FilterEffect::externalRepresentation(ts);
554 ts << " stdDeviation=\"" << m_stdX << ", " << m_stdY << "\"]\n";
555 inputEffect(0)->externalRepresentation(ts, indent + 1);
559 } // namespace WebCore