Optimize the texture packing for texImage2D() and texSubImage2D() in WebGL
authorcommit-queue@webkit.org <commit-queue@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Wed, 23 Jan 2013 02:58:32 +0000 (02:58 +0000)
committercommit-queue@webkit.org <commit-queue@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Wed, 23 Jan 2013 02:58:32 +0000 (02:58 +0000)
https://bugs.webkit.org/show_bug.cgi?id=105821

Patch by Jun Jiang <jun.a.jiang@intel.com> on 2013-01-22
Reviewed by Kenneth Russell.

The original texture packing code uses function pointers to denote each unpack and pack operations for different combination of Source formats,
Destination formats and Alpha Operations. It could be made cleaner and simpler. In this patch, template functions are used and try best to avoid
generating useless code for unreachable path to avoid code bloat. Moreover, the flipY operation is moved into the pack operation instead of
operating in a standalone method to improve performance.

Already covered by current test.

* html/canvas/WebGLRenderingContext.cpp:
(WebCore):
(WebCore::WebGLRenderingContext::texImage2DImpl):
(WebCore::WebGLRenderingContext::texSubImage2DImpl):
* platform/graphics/GraphicsContext3D.cpp:
(WebCore::GraphicsContext3D::packImageData):
(WebCore::GraphicsContext3D::extractImageData):
(WebCore::GraphicsContext3D::extractTextureData):
(WebCore):
(WebCore::GraphicsContext3D::packPixels):
* platform/graphics/GraphicsContext3D.h:
(WebCore::GraphicsContext3D::srcFormatComeFromDOMElementOrImageData):
(GraphicsContext3D):
(WebCore::GraphicsContext3D::ImageExtractor::imageSourceFormat):
(ImageExtractor):
* platform/graphics/cairo/GraphicsContext3DCairo.cpp:
(WebCore::GraphicsContext3D::ImageExtractor::extractImage):
* platform/graphics/cg/GraphicsContext3DCG.cpp:
(WebCore::getSourceDataFormat):
(WebCore::GraphicsContext3D::ImageExtractor::extractImage):
* platform/graphics/efl/GraphicsContext3DEfl.cpp:
(WebCore::GraphicsContext3D::ImageExtractor::extractImage):
* platform/graphics/qt/GraphicsContext3DQt.cpp:
(WebCore::GraphicsContext3D::ImageExtractor::extractImage):
* platform/graphics/skia/GraphicsContext3DSkia.cpp:
(WebCore::GraphicsContext3D::ImageExtractor::extractImage):

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@140497 268f45cc-cd09-0410-ab3c-d52691b4dbfc

Source/WebCore/ChangeLog
Source/WebCore/html/canvas/WebGLRenderingContext.cpp
Source/WebCore/platform/graphics/GraphicsContext3D.cpp
Source/WebCore/platform/graphics/GraphicsContext3D.h
Source/WebCore/platform/graphics/cairo/GraphicsContext3DCairo.cpp
Source/WebCore/platform/graphics/cg/GraphicsContext3DCG.cpp
Source/WebCore/platform/graphics/efl/GraphicsContext3DEfl.cpp
Source/WebCore/platform/graphics/qt/GraphicsContext3DQt.cpp
Source/WebCore/platform/graphics/skia/GraphicsContext3DSkia.cpp

index d01210c..9062543 100644 (file)
@@ -1,3 +1,44 @@
+2013-01-22  Jun Jiang  <jun.a.jiang@intel.com>
+
+        Optimize the texture packing for texImage2D() and texSubImage2D() in WebGL
+        https://bugs.webkit.org/show_bug.cgi?id=105821
+
+        Reviewed by Kenneth Russell.
+
+        The original texture packing code uses function pointers to denote each unpack and pack operations for different combination of Source formats,
+        Destination formats and Alpha Operations. It could be made cleaner and simpler. In this patch, template functions are used and try best to avoid
+        generating useless code for unreachable path to avoid code bloat. Moreover, the flipY operation is moved into the pack operation instead of
+        operating in a standalone method to improve performance.
+
+        Already covered by current test.
+
+        * html/canvas/WebGLRenderingContext.cpp:
+        (WebCore):
+        (WebCore::WebGLRenderingContext::texImage2DImpl):
+        (WebCore::WebGLRenderingContext::texSubImage2DImpl):
+        * platform/graphics/GraphicsContext3D.cpp:
+        (WebCore::GraphicsContext3D::packImageData):
+        (WebCore::GraphicsContext3D::extractImageData):
+        (WebCore::GraphicsContext3D::extractTextureData):
+        (WebCore):
+        (WebCore::GraphicsContext3D::packPixels):
+        * platform/graphics/GraphicsContext3D.h:
+        (WebCore::GraphicsContext3D::srcFormatComeFromDOMElementOrImageData):
+        (GraphicsContext3D):
+        (WebCore::GraphicsContext3D::ImageExtractor::imageSourceFormat):
+        (ImageExtractor):
+        * platform/graphics/cairo/GraphicsContext3DCairo.cpp:
+        (WebCore::GraphicsContext3D::ImageExtractor::extractImage):
+        * platform/graphics/cg/GraphicsContext3DCG.cpp:
+        (WebCore::getSourceDataFormat):
+        (WebCore::GraphicsContext3D::ImageExtractor::extractImage):
+        * platform/graphics/efl/GraphicsContext3DEfl.cpp:
+        (WebCore::GraphicsContext3D::ImageExtractor::extractImage):
+        * platform/graphics/qt/GraphicsContext3DQt.cpp:
+        (WebCore::GraphicsContext3D::ImageExtractor::extractImage):
+        * platform/graphics/skia/GraphicsContext3DSkia.cpp:
+        (WebCore::GraphicsContext3D::ImageExtractor::extractImage):
+
 2013-01-21  Kentaro Hara  <haraken@chromium.org>
 
         Implement UIEvent constructor
index 1759e1c..e743ee2 100644 (file)
@@ -3645,12 +3645,12 @@ void WebGLRenderingContext::texImage2DImpl(GC3Denum target, GC3Dint level, GC3De
         synthesizeGLError(GraphicsContext3D::INVALID_VALUE, "texImage2D", "bad image data");
         return;
     }
-    GraphicsContext3D::SourceDataFormat sourceDataFormat = imageExtractor.imageSourceFormat();
+    GraphicsContext3D::DataFormat sourceDataFormat = imageExtractor.imageSourceFormat();
     GraphicsContext3D::AlphaOp alphaOp = imageExtractor.imageAlphaOp();
     const void* imagePixelData = imageExtractor.imagePixelData();
 
     bool needConversion = true;
-    if (type == GraphicsContext3D::UNSIGNED_BYTE && sourceDataFormat == GraphicsContext3D::SourceFormatRGBA8 && format == GraphicsContext3D::RGBA && alphaOp == GraphicsContext3D::AlphaDoNothing && !flipY)
+    if (type == GraphicsContext3D::UNSIGNED_BYTE && sourceDataFormat == GraphicsContext3D::DataFormatRGBA8 && format == GraphicsContext3D::RGBA && alphaOp == GraphicsContext3D::AlphaDoNothing && !flipY)
         needConversion = false;
     else {
         if (!m_context->packImageData(image, imagePixelData, format, type, flipY, alphaOp, sourceDataFormat, imageExtractor.imageWidth(), imageExtractor.imageHeight(), imageExtractor.imageSourceUnpackAlignment(), data)) {
@@ -3889,12 +3889,12 @@ void WebGLRenderingContext::texSubImage2DImpl(GC3Denum target, GC3Dint level, GC
         synthesizeGLError(GraphicsContext3D::INVALID_VALUE, "texSubImage2D", "bad image");
         return;
     }
-    GraphicsContext3D::SourceDataFormat sourceDataFormat = imageExtractor.imageSourceFormat();
+    GraphicsContext3D::DataFormat sourceDataFormat = imageExtractor.imageSourceFormat();
     GraphicsContext3D::AlphaOp alphaOp = imageExtractor.imageAlphaOp();
     const void* imagePixelData = imageExtractor.imagePixelData();
 
     bool needConversion = true;
-    if (type == GraphicsContext3D::UNSIGNED_BYTE && sourceDataFormat == GraphicsContext3D::SourceFormatRGBA8 && format == GraphicsContext3D::RGBA && alphaOp == GraphicsContext3D::AlphaDoNothing && !flipY)
+    if (type == GraphicsContext3D::UNSIGNED_BYTE && sourceDataFormat == GraphicsContext3D::DataFormatRGBA8 && format == GraphicsContext3D::RGBA && alphaOp == GraphicsContext3D::AlphaDoNothing && !flipY)
         needConversion = false;
     else {
         if (!m_context->packImageData(image, imagePixelData, format, type, flipY, alphaOp, sourceDataFormat, imageExtractor.imageWidth(), imageExtractor.imageHeight(), imageExtractor.imageSourceUnpackAlignment(), data)) {
index 7669017..2828eae 100644 (file)
@@ -1,6 +1,7 @@
 /*
  * Copyright (C) 2010 Apple Inc. All rights reserved.
  * Copyright (C) 2010 Google Inc. All rights reserved.
+ * Copyright (C) 2010 Mozilla Corporation. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -56,6 +57,67 @@ uint8_t convertColor16BigTo8(uint16_t value)
     return static_cast<uint8_t>(value & 0x00FF);
 }
 
+GraphicsContext3D::DataFormat getDataFormat(GC3Denum destinationFormat, GC3Denum destinationType)
+{
+    GraphicsContext3D::DataFormat dstFormat = GraphicsContext3D::DataFormatRGBA8;
+    switch (destinationType) {
+    case GraphicsContext3D::UNSIGNED_BYTE:
+        switch (destinationFormat) {
+        case GraphicsContext3D::RGB:
+            dstFormat = GraphicsContext3D::DataFormatRGB8;
+            break;
+        case GraphicsContext3D::RGBA:
+            dstFormat = GraphicsContext3D::DataFormatRGBA8;
+            break;
+        case GraphicsContext3D::ALPHA:
+            dstFormat = GraphicsContext3D::DataFormatA8;
+            break;
+        case GraphicsContext3D::LUMINANCE:
+            dstFormat = GraphicsContext3D::DataFormatR8;
+            break;
+        case GraphicsContext3D::LUMINANCE_ALPHA:
+            dstFormat = GraphicsContext3D::DataFormatRA8;
+            break;
+        default:
+            ASSERT_NOT_REACHED();
+        }
+        break;
+    case GraphicsContext3D::UNSIGNED_SHORT_4_4_4_4:
+        dstFormat = GraphicsContext3D::DataFormatRGBA4444;
+        break;
+    case GraphicsContext3D::UNSIGNED_SHORT_5_5_5_1:
+        dstFormat = GraphicsContext3D::DataFormatRGBA5551;
+        break;
+    case GraphicsContext3D::UNSIGNED_SHORT_5_6_5:
+        dstFormat = GraphicsContext3D::DataFormatRGB565;
+        break;
+    case GraphicsContext3D::FLOAT: // OES_texture_float
+        switch (destinationFormat) {
+        case GraphicsContext3D::RGB:
+            dstFormat = GraphicsContext3D::DataFormatRGB32F;
+            break;
+        case GraphicsContext3D::RGBA:
+            dstFormat = GraphicsContext3D::DataFormatRGBA32F;
+            break;
+        case GraphicsContext3D::ALPHA:
+            dstFormat = GraphicsContext3D::DataFormatA32F;
+            break;
+        case GraphicsContext3D::LUMINANCE:
+            dstFormat = GraphicsContext3D::DataFormatR32F;
+            break;
+        case GraphicsContext3D::LUMINANCE_ALPHA:
+            dstFormat = GraphicsContext3D::DataFormatRA32F;
+            break;
+        default:
+            ASSERT_NOT_REACHED();
+        }
+        break;
+    default:
+        ASSERT_NOT_REACHED();
+    }
+    return dstFormat;
+}
+
 } // anonymous namespace
 
 bool GraphicsContext3D::texImage2DResourceSafe(GC3Denum target, GC3Dint level, GC3Denum internalformat, GC3Dsizei width, GC3Dsizei height, GC3Dint border, GC3Denum format, GC3Denum type, GC3Dint unpackAlignment)
@@ -182,7 +244,7 @@ bool GraphicsContext3D::packImageData(
     GC3Denum type,
     bool flipY,
     AlphaOp alphaOp,
-    SourceDataFormat sourceFormat, 
+    DataFormat sourceFormat,
     unsigned width,
     unsigned height,
     unsigned sourceUnpackAlignment,
@@ -197,29 +259,8 @@ bool GraphicsContext3D::packImageData(
         return false;
     data.resize(packedSize);
 
-    if (!packPixels(reinterpret_cast<const uint8_t*>(pixels),
-        sourceFormat,
-        width,
-        height,
-        sourceUnpackAlignment,
-        format,
-        type,
-        alphaOp,
-        data.data()))
+    if (!packPixels(reinterpret_cast<const uint8_t*>(pixels), sourceFormat, width, height, sourceUnpackAlignment, format, type, alphaOp, data.data(), flipY))
         return false;
-    if (flipY) {
-        unsigned componentsPerPixel, bytesPerComponent;
-        if (!computeFormatAndTypeParameters(format, type,
-            &componentsPerPixel,
-            &bytesPerComponent))
-            return false;
-        // The image data is tightly packed, and we upload it as such.
-        unsigned unpackAlignment = 1;
-        flipVertically(data.data(), width, height,
-            componentsPerPixel * bytesPerComponent,
-            unpackAlignment);
-    }
-
     if (ImageObserver *observer = image->imageObserver())
         observer->didDraw(image);
     return true;
@@ -243,28 +284,9 @@ bool GraphicsContext3D::extractImageData(ImageData* imageData,
         return false;
     data.resize(packedSize);
 
-    if (!packPixels(imageData->data()->data(),
-                    SourceFormatRGBA8,
-                    width,
-                    height,
-                    0,
-                    format,
-                    type,
-                    premultiplyAlpha ? AlphaDoPremultiply : AlphaDoNothing,
-                    data.data()))
+    if (!packPixels(imageData->data()->data(), DataFormatRGBA8, width, height, 0, format, type, premultiplyAlpha ? AlphaDoPremultiply : AlphaDoNothing, data.data(), flipY))
         return false;
-    if (flipY) {
-        unsigned int componentsPerPixel, bytesPerComponent;
-        if (!computeFormatAndTypeParameters(format, type,
-                                            &componentsPerPixel,
-                                            &bytesPerComponent))
-            return false;
-        // The image data is tightly packed, and we upload it as such.
-        unsigned int unpackAlignment = 1;
-        flipVertically(data.data(), width, height,
-                       componentsPerPixel * bytesPerComponent,
-                       unpackAlignment);
-    }
+
     return true;
 }
 
@@ -276,62 +298,7 @@ bool GraphicsContext3D::extractTextureData(unsigned int width, unsigned int heig
                                            Vector<uint8_t>& data)
 {
     // Assumes format, type, etc. have already been validated.
-    SourceDataFormat sourceDataFormat = SourceFormatRGBA8;
-    switch (type) {
-    case UNSIGNED_BYTE:
-        switch (format) {
-        case RGBA:
-            sourceDataFormat = SourceFormatRGBA8;
-            break;
-        case RGB:
-            sourceDataFormat = SourceFormatRGB8;
-            break;
-        case ALPHA:
-            sourceDataFormat = SourceFormatA8;
-            break;
-        case LUMINANCE:
-            sourceDataFormat = SourceFormatR8;
-            break;
-        case LUMINANCE_ALPHA:
-            sourceDataFormat = SourceFormatRA8;
-            break;
-        default:
-            ASSERT_NOT_REACHED();
-        }
-        break;
-    case FLOAT: // OES_texture_float
-        switch (format) {
-        case RGBA:
-            sourceDataFormat = SourceFormatRGBA32F;
-            break;
-        case RGB:
-            sourceDataFormat = SourceFormatRGB32F;
-            break;
-        case ALPHA:
-            sourceDataFormat = SourceFormatA32F;
-            break;
-        case LUMINANCE:
-            sourceDataFormat = SourceFormatR32F;
-            break;
-        case LUMINANCE_ALPHA:
-            sourceDataFormat = SourceFormatRA32F;
-            break;
-        default:
-            ASSERT_NOT_REACHED();
-        }
-        break;
-    case UNSIGNED_SHORT_5_5_5_1:
-        sourceDataFormat = SourceFormatRGBA5551;
-        break;
-    case UNSIGNED_SHORT_4_4_4_4:
-        sourceDataFormat = SourceFormatRGBA4444;
-        break;
-    case UNSIGNED_SHORT_5_6_5:
-        sourceDataFormat = SourceFormatRGB565;
-        break;
-    default:
-        ASSERT_NOT_REACHED();
-    }
+    DataFormat sourceDataFormat = getDataFormat(format, type);
 
     // Resize the output buffer.
     unsigned int componentsPerPixel, bytesPerComponent;
@@ -342,55 +309,29 @@ bool GraphicsContext3D::extractTextureData(unsigned int width, unsigned int heig
     unsigned int bytesPerPixel = componentsPerPixel * bytesPerComponent;
     data.resize(width * height * bytesPerPixel);
 
-    if (!packPixels(static_cast<const uint8_t*>(pixels),
-                    sourceDataFormat,
-                    width, height, unpackAlignment,
-                    format, type,
-                    (premultiplyAlpha ? AlphaDoPremultiply : AlphaDoNothing),
-                    data.data()))
+    if (!packPixels(static_cast<const uint8_t*>(pixels), sourceDataFormat, width, height, unpackAlignment, format, type, (premultiplyAlpha ? AlphaDoPremultiply : AlphaDoNothing), data.data(), flipY))
         return false;
-    // The pixel data is now tightly packed.
-    if (flipY)
-        flipVertically(data.data(), width, height, bytesPerPixel, 1);
+
     return true;
 }
 
-void GraphicsContext3D::flipVertically(void* imageData,
-                                       unsigned int width,
-                                       unsigned int height,
-                                       unsigned int bytesPerPixel,
-                                       unsigned int unpackAlignment)
-{
-    if (!width || !height)
-        return;
-    unsigned int validRowBytes = width * bytesPerPixel;
-    unsigned int totalRowBytes = validRowBytes;
-    unsigned int remainder = validRowBytes % unpackAlignment;
-    if (remainder)
-        totalRowBytes += (unpackAlignment - remainder);
-    uint8_t* tempRow = new uint8_t[validRowBytes];
-    uint8_t* data = static_cast<uint8_t*>(imageData);
-    for (unsigned i = 0; i < height / 2; i++) {
-        uint8_t* lowRow = data + (totalRowBytes * i);
-        uint8_t* highRow = data + (totalRowBytes * (height - i - 1));
-        memcpy(tempRow, lowRow, validRowBytes);
-        memcpy(lowRow, highRow, validRowBytes);
-        memcpy(highRow, tempRow, validRowBytes);
-    }
-    delete[] tempRow;
-}
-
-// The following packing and unpacking routines are expressed in terms
-// of line-by-line operations, and passed by function pointer rather
-// than template parameter, to achieve the majority of the speedups of
-// having them inlined while reducing code size.
-
 namespace {
 
+/* BEGIN CODE SHARED WITH MOZILLA FIREFOX */
+
+// The following packing and unpacking routines are expressed in terms of function templates and inline functions to achieve generality and speedup.
+// Explicit template specializations correspond to the cases that would occur.
+// Some code are merged back from Mozilla code in http://mxr.mozilla.org/mozilla-central/source/content/canvas/src/WebGLTexelConversions.h
+
 //----------------------------------------------------------------------
 // Pixel unpacking routines.
+template<int format, typename SourceType, typename DstType>
+ALWAYS_INLINE void unpack(const SourceType*, DstType*, unsigned)
+{
+    ASSERT_NOT_REACHED();
+}
 
-void unpackOneRowOfRGBA16LittleToRGBA8(const uint16_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void unpack<GraphicsContext3D::DataFormatRGBA16Little, uint16_t, uint8_t>(const uint16_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
 #if HAVE(ARM_NEON_INTRINSICS)
     SIMD::unpackOneRowOfRGBA16LittleToRGBA8(source, destination, pixelsPerRow);
@@ -405,7 +346,7 @@ void unpackOneRowOfRGBA16LittleToRGBA8(const uint16_t* source, uint8_t* destinat
     }
 }
 
-void unpackOneRowOfRGBA16BigToRGBA8(const uint16_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void unpack<GraphicsContext3D::DataFormatRGBA16Big, uint16_t, uint8_t>(const uint16_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = convertColor16BigTo8(source[0]);
@@ -417,7 +358,7 @@ void unpackOneRowOfRGBA16BigToRGBA8(const uint16_t* source, uint8_t* destination
     }
 }
 
-void unpackOneRowOfRGB8ToRGBA8(const uint8_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void unpack<GraphicsContext3D::DataFormatRGB8, uint8_t, uint8_t>(const uint8_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = source[0];
@@ -429,7 +370,7 @@ void unpackOneRowOfRGB8ToRGBA8(const uint8_t* source, uint8_t* destination, unsi
     }
 }
 
-void unpackOneRowOfRGB16LittleToRGBA8(const uint16_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void unpack<GraphicsContext3D::DataFormatRGB16Little, uint16_t, uint8_t>(const uint16_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
 #if HAVE(ARM_NEON_INTRINSICS)
     SIMD::unpackOneRowOfRGB16LittleToRGBA8(source, destination, pixelsPerRow);
@@ -444,7 +385,7 @@ void unpackOneRowOfRGB16LittleToRGBA8(const uint16_t* source, uint8_t* destinati
     }
 }
 
-void unpackOneRowOfRGB16BigToRGBA8(const uint16_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void unpack<GraphicsContext3D::DataFormatRGB16Big, uint16_t, uint8_t>(const uint16_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = convertColor16BigTo8(source[0]);
@@ -456,7 +397,7 @@ void unpackOneRowOfRGB16BigToRGBA8(const uint16_t* source, uint8_t* destination,
     }
 }
 
-void unpackOneRowOfBGR8ToRGBA8(const uint8_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void unpack<GraphicsContext3D::DataFormatBGR8, uint8_t, uint8_t>(const uint8_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = source[2];
@@ -468,7 +409,7 @@ void unpackOneRowOfBGR8ToRGBA8(const uint8_t* source, uint8_t* destination, unsi
     }
 }
 
-void unpackOneRowOfARGB8ToRGBA8(const uint8_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void unpack<GraphicsContext3D::DataFormatARGB8, uint8_t, uint8_t>(const uint8_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = source[1];
@@ -480,7 +421,7 @@ void unpackOneRowOfARGB8ToRGBA8(const uint8_t* source, uint8_t* destination, uns
     }
 }
 
-void unpackOneRowOfARGB16LittleToRGBA8(const uint16_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void unpack<GraphicsContext3D::DataFormatARGB16Little, uint16_t, uint8_t>(const uint16_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
 #if HAVE(ARM_NEON_INTRINSICS)
     SIMD::unpackOneRowOfARGB16LittleToRGBA8(source, destination, pixelsPerRow);
@@ -495,7 +436,7 @@ void unpackOneRowOfARGB16LittleToRGBA8(const uint16_t* source, uint8_t* destinat
     }
 }
 
-void unpackOneRowOfARGB16BigToRGBA8(const uint16_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void unpack<GraphicsContext3D::DataFormatARGB16Big, uint16_t, uint8_t>(const uint16_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = convertColor16BigTo8(source[1]);
@@ -507,7 +448,7 @@ void unpackOneRowOfARGB16BigToRGBA8(const uint16_t* source, uint8_t* destination
     }
 }
 
-void unpackOneRowOfABGR8ToRGBA8(const uint8_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void unpack<GraphicsContext3D::DataFormatABGR8, uint8_t, uint8_t>(const uint8_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = source[3];
@@ -519,7 +460,7 @@ void unpackOneRowOfABGR8ToRGBA8(const uint8_t* source, uint8_t* destination, uns
     }
 }
 
-void unpackOneRowOfBGRA8ToRGBA8(const uint8_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void unpack<GraphicsContext3D::DataFormatBGRA8, uint8_t, uint8_t>(const uint8_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
     const uint32_t* source32 = reinterpret_cast_ptr<const uint32_t*>(source);
     uint32_t* destination32 = reinterpret_cast_ptr<uint32_t*>(destination);
@@ -537,7 +478,7 @@ void unpackOneRowOfBGRA8ToRGBA8(const uint8_t* source, uint8_t* destination, uns
     }
 }
 
-void unpackOneRowOfBGRA16LittleToRGBA8(const uint16_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void unpack<GraphicsContext3D::DataFormatBGRA16Little, uint16_t, uint8_t>(const uint16_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
 #if HAVE(ARM_NEON_INTRINSICS)
     SIMD::unpackOneRowOfBGRA16LittleToRGBA8(source, destination, pixelsPerRow);
@@ -552,7 +493,7 @@ void unpackOneRowOfBGRA16LittleToRGBA8(const uint16_t* source, uint8_t* destinat
     }
 }
 
-void unpackOneRowOfBGRA16BigToRGBA8(const uint16_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void unpack<GraphicsContext3D::DataFormatBGRA16Big, uint16_t, uint8_t>(const uint16_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = convertColor16BigTo8(source[2]);
@@ -564,7 +505,7 @@ void unpackOneRowOfBGRA16BigToRGBA8(const uint16_t* source, uint8_t* destination
     }
 }
 
-void unpackOneRowOfRGBA5551ToRGBA8(const uint16_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void unpack<GraphicsContext3D::DataFormatRGBA5551, uint16_t, uint8_t>(const uint16_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
 #if HAVE(ARM_NEON_INTRINSICS)
     SIMD::unpackOneRowOfRGBA5551ToRGBA8(source, destination, pixelsPerRow);
@@ -583,7 +524,7 @@ void unpackOneRowOfRGBA5551ToRGBA8(const uint16_t* source, uint8_t* destination,
     }
 }
 
-void unpackOneRowOfRGBA4444ToRGBA8(const uint16_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void unpack<GraphicsContext3D::DataFormatRGBA4444, uint16_t, uint8_t>(const uint16_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
 #if HAVE(ARM_NEON_INTRINSICS)
     SIMD::unpackOneRowOfRGBA4444ToRGBA8(source, destination, pixelsPerRow);
@@ -603,7 +544,7 @@ void unpackOneRowOfRGBA4444ToRGBA8(const uint16_t* source, uint8_t* destination,
     }
 }
 
-void unpackOneRowOfRGB565ToRGBA8(const uint16_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void unpack<GraphicsContext3D::DataFormatRGB565, uint16_t, uint8_t>(const uint16_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
 #if HAVE(ARM_NEON_INTRINSICS)
     SIMD::unpackOneRowOfRGB565ToRGBA8(source, destination, pixelsPerRow);
@@ -622,7 +563,7 @@ void unpackOneRowOfRGB565ToRGBA8(const uint16_t* source, uint8_t* destination, u
     }
 }
 
-void unpackOneRowOfR8ToRGBA8(const uint8_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void unpack<GraphicsContext3D::DataFormatR8, uint8_t, uint8_t>(const uint8_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = source[0];
@@ -634,7 +575,7 @@ void unpackOneRowOfR8ToRGBA8(const uint8_t* source, uint8_t* destination, unsign
     }
 }
 
-void unpackOneRowOfR16LittleToRGBA8(const uint16_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void unpack<GraphicsContext3D::DataFormatR16Little, uint16_t, uint8_t>(const uint16_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = convertColor16LittleTo8(source[0]);
@@ -646,7 +587,7 @@ void unpackOneRowOfR16LittleToRGBA8(const uint16_t* source, uint8_t* destination
     }
 }
 
-void unpackOneRowOfR16BigToRGBA8(const uint16_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void unpack<GraphicsContext3D::DataFormatR16Big, uint16_t, uint8_t>(const uint16_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = convertColor16BigTo8(source[0]);
@@ -658,7 +599,7 @@ void unpackOneRowOfR16BigToRGBA8(const uint16_t* source, uint8_t* destination, u
     }
 }
 
-void unpackOneRowOfRA8ToRGBA8(const uint8_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void unpack<GraphicsContext3D::DataFormatRA8, uint8_t, uint8_t>(const uint8_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = source[0];
@@ -670,7 +611,7 @@ void unpackOneRowOfRA8ToRGBA8(const uint8_t* source, uint8_t* destination, unsig
     }
 }
 
-void unpackOneRowOfRA16LittleToRGBA8(const uint16_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void unpack<GraphicsContext3D::DataFormatRA16Little, uint16_t, uint8_t>(const uint16_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = convertColor16LittleTo8(source[0]);
@@ -682,7 +623,7 @@ void unpackOneRowOfRA16LittleToRGBA8(const uint16_t* source, uint8_t* destinatio
     }
 }
 
-void unpackOneRowOfRA16BigToRGBA8(const uint16_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void unpack<GraphicsContext3D::DataFormatRA16Big, uint16_t, uint8_t>(const uint16_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = convertColor16BigTo8(source[0]);
@@ -694,7 +635,7 @@ void unpackOneRowOfRA16BigToRGBA8(const uint16_t* source, uint8_t* destination,
     }
 }
 
-void unpackOneRowOfAR8ToRGBA8(const uint8_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void unpack<GraphicsContext3D::DataFormatAR8, uint8_t, uint8_t>(const uint8_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = source[1];
@@ -706,7 +647,7 @@ void unpackOneRowOfAR8ToRGBA8(const uint8_t* source, uint8_t* destination, unsig
     }
 }
 
-void unpackOneRowOfAR16LittleToRGBA8(const uint16_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void unpack<GraphicsContext3D::DataFormatAR16Little, uint16_t, uint8_t>(const uint16_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = convertColor16LittleTo8(source[1]);
@@ -718,7 +659,7 @@ void unpackOneRowOfAR16LittleToRGBA8(const uint16_t* source, uint8_t* destinatio
     }
 }
 
-void unpackOneRowOfAR16BigToRGBA8(const uint16_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void unpack<GraphicsContext3D::DataFormatAR16Big, uint16_t, uint8_t>(const uint16_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = convertColor16BigTo8(source[1]);
@@ -730,7 +671,7 @@ void unpackOneRowOfAR16BigToRGBA8(const uint16_t* source, uint8_t* destination,
     }
 }
 
-void unpackOneRowOfA8ToRGBA8(const uint8_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void unpack<GraphicsContext3D::DataFormatA8, uint8_t, uint8_t>(const uint8_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = 0x0;
@@ -742,7 +683,7 @@ void unpackOneRowOfA8ToRGBA8(const uint8_t* source, uint8_t* destination, unsign
     }
 }
 
-void unpackOneRowOfA16LittleToRGBA8(const uint16_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void unpack<GraphicsContext3D::DataFormatA16Little, uint16_t, uint8_t>(const uint16_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = 0x0;
@@ -754,7 +695,7 @@ void unpackOneRowOfA16LittleToRGBA8(const uint16_t* source, uint8_t* destination
     }
 }
 
-void unpackOneRowOfA16BigToRGBA8(const uint16_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void unpack<GraphicsContext3D::DataFormatA16Big, uint16_t, uint8_t>(const uint16_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = 0x0;
@@ -766,7 +707,7 @@ void unpackOneRowOfA16BigToRGBA8(const uint16_t* source, uint8_t* destination, u
     }
 }
 
-void unpackOneRowOfRGBA8ToRGBA32F(const uint8_t* source, float* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void unpack<GraphicsContext3D::DataFormatRGBA8, uint8_t, float>(const uint8_t* source, float* destination, unsigned pixelsPerRow)
 {
     const float scaleFactor = 1.0f / 255.0f;
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
@@ -779,7 +720,7 @@ void unpackOneRowOfRGBA8ToRGBA32F(const uint8_t* source, float* destination, uns
     }
 }
 
-void unpackOneRowOfBGRA8ToRGBA32F(const uint8_t* source, float* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void unpack<GraphicsContext3D::DataFormatBGRA8, uint8_t, float>(const uint8_t* source, float* destination, unsigned pixelsPerRow)
 {
     const float scaleFactor = 1.0f / 255.0f;
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
@@ -792,7 +733,7 @@ void unpackOneRowOfBGRA8ToRGBA32F(const uint8_t* source, float* destination, uns
     }
 }
 
-void unpackOneRowOfRGB32FToRGBA32F(const float* source, float* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void unpack<GraphicsContext3D::DataFormatRGB32F, float, float>(const float* source, float* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = source[0];
@@ -804,7 +745,7 @@ void unpackOneRowOfRGB32FToRGBA32F(const float* source, float* destination, unsi
     }
 }
 
-void unpackOneRowOfR32FToRGBA32F(const float* source, float* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void unpack<GraphicsContext3D::DataFormatR32F, float, float>(const float* source, float* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = source[0];
@@ -816,7 +757,7 @@ void unpackOneRowOfR32FToRGBA32F(const float* source, float* destination, unsign
     }
 }
 
-void unpackOneRowOfRA32FToRGBA32F(const float* source, float* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void unpack<GraphicsContext3D::DataFormatRA32F, float, float>(const float* source, float* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = source[0];
@@ -828,7 +769,7 @@ void unpackOneRowOfRA32FToRGBA32F(const float* source, float* destination, unsig
     }
 }
 
-void unpackOneRowOfA32FToRGBA32F(const float* source, float* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void unpack<GraphicsContext3D::DataFormatA32F, float, float>(const float* source, float* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = 0;
@@ -844,7 +785,13 @@ void unpackOneRowOfA32FToRGBA32F(const float* source, float* destination, unsign
 // Pixel packing routines.
 //
 
-void packOneRowOfRGBA8ToA8(const uint8_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<int format, int alphaOp, typename SourceType, typename DstType>
+ALWAYS_INLINE void pack(const SourceType*, DstType*, unsigned)
+{
+    ASSERT_NOT_REACHED();
+}
+
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatA8, GraphicsContext3D::AlphaDoNothing, uint8_t, uint8_t>(const uint8_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = source[3];
@@ -853,7 +800,7 @@ void packOneRowOfRGBA8ToA8(const uint8_t* source, uint8_t* destination, unsigned
     }
 }
 
-void packOneRowOfRGBA8ToR8(const uint8_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatR8, GraphicsContext3D::AlphaDoNothing, uint8_t, uint8_t>(const uint8_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = source[0];
@@ -862,7 +809,7 @@ void packOneRowOfRGBA8ToR8(const uint8_t* source, uint8_t* destination, unsigned
     }
 }
 
-void packOneRowOfRGBA8ToR8Premultiply(const uint8_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatR8, GraphicsContext3D::AlphaDoPremultiply, uint8_t, uint8_t>(const uint8_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         float scaleFactor = source[3] / 255.0f;
@@ -874,7 +821,7 @@ void packOneRowOfRGBA8ToR8Premultiply(const uint8_t* source, uint8_t* destinatio
 }
 
 // FIXME: this routine is lossy and must be removed.
-void packOneRowOfRGBA8ToR8Unmultiply(const uint8_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatR8, GraphicsContext3D::AlphaDoUnmultiply, uint8_t, uint8_t>(const uint8_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         float scaleFactor = 1.0f / (source[3] ? source[3] / 255.0f : 1.0f);
@@ -885,7 +832,7 @@ void packOneRowOfRGBA8ToR8Unmultiply(const uint8_t* source, uint8_t* destination
     }
 }
 
-void packOneRowOfRGBA8ToRA8(const uint8_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatRA8, GraphicsContext3D::AlphaDoNothing, uint8_t, uint8_t>(const uint8_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = source[0];
@@ -895,7 +842,7 @@ void packOneRowOfRGBA8ToRA8(const uint8_t* source, uint8_t* destination, unsigne
     }
 }
 
-void packOneRowOfRGBA8ToRA8Premultiply(const uint8_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatRA8, GraphicsContext3D::AlphaDoPremultiply, uint8_t, uint8_t>(const uint8_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         float scaleFactor = source[3] / 255.0f;
@@ -908,7 +855,7 @@ void packOneRowOfRGBA8ToRA8Premultiply(const uint8_t* source, uint8_t* destinati
 }
 
 // FIXME: this routine is lossy and must be removed.
-void packOneRowOfRGBA8ToRA8Unmultiply(const uint8_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatRA8, GraphicsContext3D::AlphaDoUnmultiply, uint8_t, uint8_t>(const uint8_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         float scaleFactor = 1.0f / (source[3] ? source[3] / 255.0f : 1.0f);
@@ -920,7 +867,7 @@ void packOneRowOfRGBA8ToRA8Unmultiply(const uint8_t* source, uint8_t* destinatio
     }
 }
 
-void packOneRowOfRGBA8ToRGB8(const uint8_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatRGB8, GraphicsContext3D::AlphaDoNothing, uint8_t, uint8_t>(const uint8_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = source[0];
@@ -931,7 +878,7 @@ void packOneRowOfRGBA8ToRGB8(const uint8_t* source, uint8_t* destination, unsign
     }
 }
 
-void packOneRowOfRGBA8ToRGB8Premultiply(const uint8_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatRGB8, GraphicsContext3D::AlphaDoPremultiply, uint8_t, uint8_t>(const uint8_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         float scaleFactor = source[3] / 255.0f;
@@ -947,7 +894,7 @@ void packOneRowOfRGBA8ToRGB8Premultiply(const uint8_t* source, uint8_t* destinat
 }
 
 // FIXME: this routine is lossy and must be removed.
-void packOneRowOfRGBA8ToRGB8Unmultiply(const uint8_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatRGB8, GraphicsContext3D::AlphaDoUnmultiply, uint8_t, uint8_t>(const uint8_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         float scaleFactor = 1.0f / (source[3] ? source[3] / 255.0f : 1.0f);
@@ -962,7 +909,13 @@ void packOneRowOfRGBA8ToRGB8Unmultiply(const uint8_t* source, uint8_t* destinati
     }
 }
 
-void packOneRowOfRGBA8ToRGBA8Premultiply(const uint8_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatRGBA8, GraphicsContext3D::AlphaDoNothing, uint8_t, uint8_t>(const uint8_t* source, uint8_t* destination, unsigned pixelsPerRow)
+{
+    memcpy(destination, source, pixelsPerRow * 4);
+}
+
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatRGBA8, GraphicsContext3D::AlphaDoPremultiply, uint8_t, uint8_t>(const uint8_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         float scaleFactor = source[3] / 255.0f;
@@ -979,7 +932,7 @@ void packOneRowOfRGBA8ToRGBA8Premultiply(const uint8_t* source, uint8_t* destina
 }
 
 // FIXME: this routine is lossy and must be removed.
-void packOneRowOfRGBA8ToRGBA8Unmultiply(const uint8_t* source, uint8_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatRGBA8, GraphicsContext3D::AlphaDoUnmultiply, uint8_t, uint8_t>(const uint8_t* source, uint8_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         float scaleFactor = 1.0f / (source[3] ? source[3] / 255.0f : 1.0f);
@@ -995,7 +948,7 @@ void packOneRowOfRGBA8ToRGBA8Unmultiply(const uint8_t* source, uint8_t* destinat
     }
 }
 
-void packOneRowOfRGBA8ToUnsignedShort4444(const uint8_t* source, uint16_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatRGBA4444, GraphicsContext3D::AlphaDoNothing, uint8_t, uint16_t>(const uint8_t* source, uint16_t* destination, unsigned pixelsPerRow)
 {
 #if HAVE(ARM_NEON_INTRINSICS)
     SIMD::packOneRowOfRGBA8ToUnsignedShort4444(source, destination, pixelsPerRow);
@@ -1010,7 +963,7 @@ void packOneRowOfRGBA8ToUnsignedShort4444(const uint8_t* source, uint16_t* desti
     }
 }
 
-void packOneRowOfRGBA8ToUnsignedShort4444Premultiply(const uint8_t* source, uint16_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatRGBA4444, GraphicsContext3D::AlphaDoPremultiply, uint8_t, uint16_t>(const uint8_t* source, uint16_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         float scaleFactor = source[3] / 255.0f;
@@ -1027,7 +980,7 @@ void packOneRowOfRGBA8ToUnsignedShort4444Premultiply(const uint8_t* source, uint
 }
 
 // FIXME: this routine is lossy and must be removed.
-void packOneRowOfRGBA8ToUnsignedShort4444Unmultiply(const uint8_t* source, uint16_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatRGBA4444, GraphicsContext3D::AlphaDoUnmultiply, uint8_t, uint16_t>(const uint8_t* source, uint16_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         float scaleFactor = 1.0f / (source[3] ? source[3] / 255.0f : 1.0f);
@@ -1043,7 +996,7 @@ void packOneRowOfRGBA8ToUnsignedShort4444Unmultiply(const uint8_t* source, uint1
     }
 }
 
-void packOneRowOfRGBA8ToUnsignedShort5551(const uint8_t* source, uint16_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatRGBA5551, GraphicsContext3D::AlphaDoNothing, uint8_t, uint16_t>(const uint8_t* source, uint16_t* destination, unsigned pixelsPerRow)
 {
 #if HAVE(ARM_NEON_INTRINSICS)
     SIMD::packOneRowOfRGBA8ToUnsignedShort5551(source, destination, pixelsPerRow);
@@ -1058,7 +1011,7 @@ void packOneRowOfRGBA8ToUnsignedShort5551(const uint8_t* source, uint16_t* desti
     }
 }
 
-void packOneRowOfRGBA8ToUnsignedShort5551Premultiply(const uint8_t* source, uint16_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatRGBA5551, GraphicsContext3D::AlphaDoPremultiply, uint8_t, uint16_t>(const uint8_t* source, uint16_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         float scaleFactor = source[3] / 255.0f;
@@ -1075,7 +1028,7 @@ void packOneRowOfRGBA8ToUnsignedShort5551Premultiply(const uint8_t* source, uint
 }
 
 // FIXME: this routine is lossy and must be removed.
-void packOneRowOfRGBA8ToUnsignedShort5551Unmultiply(const uint8_t* source, uint16_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatRGBA5551, GraphicsContext3D::AlphaDoUnmultiply, uint8_t, uint16_t>(const uint8_t* source, uint16_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         float scaleFactor = 1.0f / (source[3] ? source[3] / 255.0f : 1.0f);
@@ -1091,7 +1044,7 @@ void packOneRowOfRGBA8ToUnsignedShort5551Unmultiply(const uint8_t* source, uint1
     }
 }
 
-void packOneRowOfRGBA8ToUnsignedShort565(const uint8_t* source, uint16_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatRGB565, GraphicsContext3D::AlphaDoNothing, uint8_t, uint16_t>(const uint8_t* source, uint16_t* destination, unsigned pixelsPerRow)
 {
 #if HAVE(ARM_NEON_INTRINSICS)
     SIMD::packOneRowOfRGBA8ToUnsignedShort565(source, destination, pixelsPerRow);
@@ -1105,7 +1058,7 @@ void packOneRowOfRGBA8ToUnsignedShort565(const uint8_t* source, uint16_t* destin
     }
 }
 
-void packOneRowOfRGBA8ToUnsignedShort565Premultiply(const uint8_t* source, uint16_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatRGB565, GraphicsContext3D::AlphaDoPremultiply, uint8_t, uint16_t>(const uint8_t* source, uint16_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         float scaleFactor = source[3] / 255.0f;
@@ -1121,7 +1074,7 @@ void packOneRowOfRGBA8ToUnsignedShort565Premultiply(const uint8_t* source, uint1
 }
 
 // FIXME: this routine is lossy and must be removed.
-void packOneRowOfRGBA8ToUnsignedShort565Unmultiply(const uint8_t* source, uint16_t* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatRGB565, GraphicsContext3D::AlphaDoUnmultiply, uint8_t, uint16_t>(const uint8_t* source, uint16_t* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         float scaleFactor = 1.0f / (source[3] ? source[3] / 255.0f : 1.0f);
@@ -1136,7 +1089,7 @@ void packOneRowOfRGBA8ToUnsignedShort565Unmultiply(const uint8_t* source, uint16
     }
 }
 
-void packOneRowOfRGBA32FToRGB32F(const float* source, float* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatRGB32F, GraphicsContext3D::AlphaDoNothing, float, float>(const float* source, float* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = source[0];
@@ -1147,7 +1100,7 @@ void packOneRowOfRGBA32FToRGB32F(const float* source, float* destination, unsign
     }
 }
 
-void packOneRowOfRGBA32FToRGB32FPremultiply(const float* source, float* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatRGB32F, GraphicsContext3D::AlphaDoPremultiply, float, float>(const float* source, float* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         float scaleFactor = source[3];
@@ -1159,7 +1112,7 @@ void packOneRowOfRGBA32FToRGB32FPremultiply(const float* source, float* destinat
     }
 }
 
-void packOneRowOfRGBA32FToRGB32FUnmultiply(const float* source, float* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatRGB32F, GraphicsContext3D::AlphaDoUnmultiply, float, float>(const float* source, float* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         float scaleFactor = source[3] ? 1.0f / source[3] : 1.0f;
@@ -1172,7 +1125,7 @@ void packOneRowOfRGBA32FToRGB32FUnmultiply(const float* source, float* destinati
 }
 
 // Used only during RGBA8 or BGRA8 -> floating-point uploads.
-void packOneRowOfRGBA32FToRGBA32F(const float* source, float* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatRGBA32F, GraphicsContext3D::AlphaDoNothing, float, float>(const float* source, float* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = source[0];
@@ -1184,7 +1137,7 @@ void packOneRowOfRGBA32FToRGBA32F(const float* source, float* destination, unsig
     }
 }
 
-void packOneRowOfRGBA32FToRGBA32FPremultiply(const float* source, float* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatRGBA32F, GraphicsContext3D::AlphaDoPremultiply, float, float>(const float* source, float* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         float scaleFactor = source[3];
@@ -1197,7 +1150,7 @@ void packOneRowOfRGBA32FToRGBA32FPremultiply(const float* source, float* destina
     }
 }
 
-void packOneRowOfRGBA32FToRGBA32FUnmultiply(const float* source, float* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatRGBA32F, GraphicsContext3D::AlphaDoUnmultiply, float, float>(const float* source, float* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         float scaleFactor = source[3] ? 1.0f / source[3] : 1.0f;
@@ -1210,7 +1163,7 @@ void packOneRowOfRGBA32FToRGBA32FUnmultiply(const float* source, float* destinat
     }
 }
 
-void packOneRowOfRGBA32FToA32F(const float* source, float* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatA32F, GraphicsContext3D::AlphaDoNothing, float, float>(const float* source, float* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = source[3];
@@ -1219,7 +1172,7 @@ void packOneRowOfRGBA32FToA32F(const float* source, float* destination, unsigned
     }
 }
 
-void packOneRowOfRGBA32FToR32F(const float* source, float* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatR32F, GraphicsContext3D::AlphaDoNothing, float, float>(const float* source, float* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = source[0];
@@ -1228,7 +1181,7 @@ void packOneRowOfRGBA32FToR32F(const float* source, float* destination, unsigned
     }
 }
 
-void packOneRowOfRGBA32FToR32FPremultiply(const float* source, float* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatR32F, GraphicsContext3D::AlphaDoPremultiply, float, float>(const float* source, float* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         float scaleFactor = source[3];
@@ -1238,7 +1191,7 @@ void packOneRowOfRGBA32FToR32FPremultiply(const float* source, float* destinatio
     }
 }
 
-void packOneRowOfRGBA32FToR32FUnmultiply(const float* source, float* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatR32F, GraphicsContext3D::AlphaDoUnmultiply, float, float>(const float* source, float* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         float scaleFactor = source[3] ? 1.0f / source[3] : 1.0f;
@@ -1248,7 +1201,7 @@ void packOneRowOfRGBA32FToR32FUnmultiply(const float* source, float* destination
     }
 }
 
-void packOneRowOfRGBA32FToRA32F(const float* source, float* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatRA32F, GraphicsContext3D::AlphaDoNothing, float, float>(const float* source, float* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = source[0];
@@ -1258,7 +1211,7 @@ void packOneRowOfRGBA32FToRA32F(const float* source, float* destination, unsigne
     }
 }
 
-void packOneRowOfRGBA32FToRA32FPremultiply(const float* source, float* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatRA32F, GraphicsContext3D::AlphaDoPremultiply, float, float>(const float* source, float* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         float scaleFactor = source[3];
@@ -1269,7 +1222,7 @@ void packOneRowOfRGBA32FToRA32FPremultiply(const float* source, float* destinati
     }
 }
 
-void packOneRowOfRGBA32FToRA32FUnmultiply(const float* source, float* destination, unsigned int pixelsPerRow)
+template<> ALWAYS_INLINE void pack<GraphicsContext3D::DataFormatRA32F, GraphicsContext3D::AlphaDoUnmultiply, float, float>(const float* source, float* destination, unsigned pixelsPerRow)
 {
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         float scaleFactor = source[3] ? 1.0f / source[3] : 1.0f;
@@ -1280,545 +1233,403 @@ void packOneRowOfRGBA32FToRA32FUnmultiply(const float* source, float* destinatio
     }
 }
 
-} // anonymous namespace
+ALWAYS_INLINE bool HasAlpha(int format)
+{
+    return format == GraphicsContext3D::DataFormatA8
+        || format == GraphicsContext3D::DataFormatA16Little
+        || format == GraphicsContext3D::DataFormatA16Big
+        || format == GraphicsContext3D::DataFormatA32F
+        || format == GraphicsContext3D::DataFormatRA8
+        || format == GraphicsContext3D::DataFormatAR8
+        || format == GraphicsContext3D::DataFormatRA16Little
+        || format == GraphicsContext3D::DataFormatRA16Big
+        || format == GraphicsContext3D::DataFormatAR16Little
+        || format == GraphicsContext3D::DataFormatAR16Big
+        || format == GraphicsContext3D::DataFormatRA32F
+        || format == GraphicsContext3D::DataFormatRGBA8
+        || format == GraphicsContext3D::DataFormatRGBA16Little
+        || format == GraphicsContext3D::DataFormatRGBA16Big
+        || format == GraphicsContext3D::DataFormatBGRA8
+        || format == GraphicsContext3D::DataFormatBGRA16Little
+        || format == GraphicsContext3D::DataFormatBGRA16Big
+        || format == GraphicsContext3D::DataFormatARGB8
+        || format == GraphicsContext3D::DataFormatARGB16Little
+        || format == GraphicsContext3D::DataFormatARGB16Big
+        || format == GraphicsContext3D::DataFormatABGR8
+        || format == GraphicsContext3D::DataFormatRGBA32F
+        || format == GraphicsContext3D::DataFormatRGBA4444
+        || format == GraphicsContext3D::DataFormatRGBA5551;
+}
+
+ALWAYS_INLINE bool HasColor(int format)
+{
+    return format == GraphicsContext3D::DataFormatRGBA8
+        || format == GraphicsContext3D::DataFormatRGBA16Little
+        || format == GraphicsContext3D::DataFormatRGBA16Big
+        || format == GraphicsContext3D::DataFormatRGBA32F
+        || format == GraphicsContext3D::DataFormatRGB8
+        || format == GraphicsContext3D::DataFormatRGB16Little
+        || format == GraphicsContext3D::DataFormatRGB16Big
+        || format == GraphicsContext3D::DataFormatRGB32F
+        || format == GraphicsContext3D::DataFormatBGR8
+        || format == GraphicsContext3D::DataFormatBGRA8
+        || format == GraphicsContext3D::DataFormatBGRA16Little
+        || format == GraphicsContext3D::DataFormatBGRA16Big
+        || format == GraphicsContext3D::DataFormatARGB8
+        || format == GraphicsContext3D::DataFormatARGB16Little
+        || format == GraphicsContext3D::DataFormatARGB16Big
+        || format == GraphicsContext3D::DataFormatABGR8
+        || format == GraphicsContext3D::DataFormatRGBA5551
+        || format == GraphicsContext3D::DataFormatRGBA4444
+        || format == GraphicsContext3D::DataFormatRGB565
+        || format == GraphicsContext3D::DataFormatR8
+        || format == GraphicsContext3D::DataFormatR16Little
+        || format == GraphicsContext3D::DataFormatR16Big
+        || format == GraphicsContext3D::DataFormatR32F
+        || format == GraphicsContext3D::DataFormatRA8
+        || format == GraphicsContext3D::DataFormatRA16Little
+        || format == GraphicsContext3D::DataFormatRA16Big
+        || format == GraphicsContext3D::DataFormatRA32F
+        || format == GraphicsContext3D::DataFormatAR8
+        || format == GraphicsContext3D::DataFormatAR16Little
+        || format == GraphicsContext3D::DataFormatAR16Big;
+}
+
+template<int Format>
+struct IsFloatFormat {
+    static const bool Value =
+        Format == GraphicsContext3D::DataFormatRGBA32F
+        || Format == GraphicsContext3D::DataFormatRGB32F
+        || Format == GraphicsContext3D::DataFormatRA32F
+        || Format == GraphicsContext3D::DataFormatR32F
+        || Format == GraphicsContext3D::DataFormatA32F;
+};
+
+template<int Format>
+struct Is16bppFormat {
+    static const bool Value =
+        Format == GraphicsContext3D::DataFormatRGBA16Little
+        || Format == GraphicsContext3D::DataFormatRGBA16Big
+        || Format == GraphicsContext3D::DataFormatRGB16Little
+        || Format == GraphicsContext3D::DataFormatRGB16Big
+        || Format == GraphicsContext3D::DataFormatBGRA16Little
+        || Format == GraphicsContext3D::DataFormatBGRA16Big
+        || Format == GraphicsContext3D::DataFormatARGB16Little
+        || Format == GraphicsContext3D::DataFormatARGB16Big
+        || Format == GraphicsContext3D::DataFormatRGBA5551
+        || Format == GraphicsContext3D::DataFormatRGBA4444
+        || Format == GraphicsContext3D::DataFormatRGB565
+        || Format == GraphicsContext3D::DataFormatR16Little
+        || Format == GraphicsContext3D::DataFormatR16Big
+        || Format == GraphicsContext3D::DataFormatRA16Little
+        || Format == GraphicsContext3D::DataFormatRA16Big
+        || Format == GraphicsContext3D::DataFormatAR16Little
+        || Format == GraphicsContext3D::DataFormatAR16Big
+        || Format == GraphicsContext3D::DataFormatA16Little
+        || Format == GraphicsContext3D::DataFormatA16Big;
+};
+
+template<int Format, bool IsFloat = IsFloatFormat<Format>::Value, bool Is16bpp = Is16bppFormat<Format>::Value>
+struct DataTypeForFormat {
+    typedef uint8_t Type;
+};
+
+template<int Format>
+struct DataTypeForFormat<Format, true, false> {
+    typedef float Type;
+};
+
+template<int Format>
+struct DataTypeForFormat<Format, false, true> {
+    typedef uint16_t Type;
+};
+
+template<int Format>
+struct IntermediateFormat {
+    static const int Value = IsFloatFormat<Format>::Value ? GraphicsContext3D::DataFormatRGBA32F : GraphicsContext3D::DataFormatRGBA8;
+};
+
+ALWAYS_INLINE unsigned TexelBytesForFormat(GraphicsContext3D::DataFormat format)
+{
+    switch (format) {
+    case GraphicsContext3D::DataFormatR8:
+    case GraphicsContext3D::DataFormatA8:
+        return 1;
+    case GraphicsContext3D::DataFormatRA8:
+    case GraphicsContext3D::DataFormatAR8:
+    case GraphicsContext3D::DataFormatRGBA5551:
+    case GraphicsContext3D::DataFormatRGBA4444:
+    case GraphicsContext3D::DataFormatRGB565:
+    case GraphicsContext3D::DataFormatR16Little:
+    case GraphicsContext3D::DataFormatR16Big:
+    case GraphicsContext3D::DataFormatA16Little:
+    case GraphicsContext3D::DataFormatA16Big:
+        return 2;
+    case GraphicsContext3D::DataFormatRGB8:
+    case GraphicsContext3D::DataFormatBGR8:
+        return 3;
+    case GraphicsContext3D::DataFormatRGBA8:
+    case GraphicsContext3D::DataFormatARGB8:
+    case GraphicsContext3D::DataFormatABGR8:
+    case GraphicsContext3D::DataFormatRA16Little:
+    case GraphicsContext3D::DataFormatRA16Big:
+    case GraphicsContext3D::DataFormatBGRA8:
+    case GraphicsContext3D::DataFormatR32F:
+    case GraphicsContext3D::DataFormatA32F:
+    case GraphicsContext3D::DataFormatAR16Little:
+    case GraphicsContext3D::DataFormatAR16Big:
+        return 4;
+    case GraphicsContext3D::DataFormatRGB16Little:
+    case GraphicsContext3D::DataFormatRGB16Big:
+        return 6;
+    case GraphicsContext3D::DataFormatRA32F:
+    case GraphicsContext3D::DataFormatRGBA16Little:
+    case GraphicsContext3D::DataFormatRGBA16Big:
+    case GraphicsContext3D::DataFormatBGRA16Little:
+    case GraphicsContext3D::DataFormatBGRA16Big:
+    case GraphicsContext3D::DataFormatARGB16Little:
+    case GraphicsContext3D::DataFormatARGB16Big:
+        return 8;
+    case GraphicsContext3D::DataFormatRGB32F:
+        return 12;
+    case GraphicsContext3D::DataFormatRGBA32F:
+        return 16;
+    default:
+        return 0;
+    }
+}
 
-// This is used whenever unpacking is necessary; i.e., the source data
-// is not in RGBA8/RGBA32F format, or the unpack alignment specifies
-// that rows are not tightly packed.
-template<typename SourceType, typename IntermediateType, typename DestType>
-static void doUnpackingAndPacking(const SourceType* sourceData,
-                                  void (*rowUnpackingFunc)(const SourceType*, IntermediateType*, unsigned int),
-                                  unsigned int width,
-                                  unsigned int height,
-                                  unsigned int sourceElementsPerRow,
-                                  DestType* destinationData,
-                                  void (*rowPackingFunc)(const IntermediateType*, DestType*, unsigned int),
-                                  unsigned int destinationElementsPerPixel)
-{
-    if (!rowPackingFunc) {
-        // The row packing is trivial, so don't bother with a temporary buffer.
-        const SourceType* endPointer = sourceData + height * sourceElementsPerRow;
-        unsigned int destinationElementsPerRow = width * destinationElementsPerPixel;
-        while (sourceData < endPointer) {
-            rowUnpackingFunc(sourceData, reinterpret_cast<IntermediateType*>(destinationData), width);
-            sourceData += sourceElementsPerRow;
-            destinationData += destinationElementsPerRow;
+/* END CODE SHARED WITH MOZILLA FIREFOX */
+
+class FormatConverter {
+public:
+    FormatConverter(unsigned width, unsigned height,
+        const void* srcStart, void* dstStart, int srcStride, int dstStride)
+        : m_width(width), m_height(height), m_srcStart(srcStart), m_dstStart(dstStart), m_srcStride(srcStride), m_dstStride(dstStride), m_success(false)
+    {
+        const unsigned MaxNumberOfComponents = 4;
+        const unsigned MaxBytesPerComponent  = 4;
+        unpackedIntermediateSrcData = adoptArrayPtr(new uint8_t[m_width * MaxNumberOfComponents *MaxBytesPerComponent]);
+        ASSERT(unpackedIntermediateSrcData.get());
+    }
+
+    void convert(GraphicsContext3D::DataFormat srcFormat, GraphicsContext3D::DataFormat dstFormat, GraphicsContext3D::AlphaOp);
+    bool Success() const { return m_success; }
+
+private:
+    template<GraphicsContext3D::DataFormat SrcFormat>
+    ALWAYS_INLINE void convert(GraphicsContext3D::DataFormat dstFormat, GraphicsContext3D::AlphaOp);
+
+    template<GraphicsContext3D::DataFormat SrcFormat, GraphicsContext3D::DataFormat DstFormat>
+    ALWAYS_INLINE void convert(GraphicsContext3D::AlphaOp);
+
+    template<GraphicsContext3D::DataFormat SrcFormat, GraphicsContext3D::DataFormat DstFormat, GraphicsContext3D::AlphaOp alphaOp>
+    ALWAYS_INLINE void convert();
+
+    const unsigned m_width, m_height;
+    const void* const m_srcStart;
+    void* const m_dstStart;
+    const int m_srcStride, m_dstStride;
+    bool m_success;
+    OwnArrayPtr<uint8_t> unpackedIntermediateSrcData;
+};
+
+void FormatConverter::convert(GraphicsContext3D::DataFormat srcFormat, GraphicsContext3D::DataFormat dstFormat, GraphicsContext3D::AlphaOp alphaOp)
+{
+#define FORMATCONVERTER_CASE_SRCFORMAT(SrcFormat) \
+    case SrcFormat: \
+        return convert<SrcFormat>(dstFormat, alphaOp);
+
+        switch (srcFormat) {
+            FORMATCONVERTER_CASE_SRCFORMAT(GraphicsContext3D::DataFormatR8)
+            FORMATCONVERTER_CASE_SRCFORMAT(GraphicsContext3D::DataFormatA8)
+            FORMATCONVERTER_CASE_SRCFORMAT(GraphicsContext3D::DataFormatR32F)
+            FORMATCONVERTER_CASE_SRCFORMAT(GraphicsContext3D::DataFormatA32F)
+            FORMATCONVERTER_CASE_SRCFORMAT(GraphicsContext3D::DataFormatRA8)
+            FORMATCONVERTER_CASE_SRCFORMAT(GraphicsContext3D::DataFormatRA32F)
+            FORMATCONVERTER_CASE_SRCFORMAT(GraphicsContext3D::DataFormatRGB8)
+            FORMATCONVERTER_CASE_SRCFORMAT(GraphicsContext3D::DataFormatBGR8)
+            FORMATCONVERTER_CASE_SRCFORMAT(GraphicsContext3D::DataFormatRGB565)
+            FORMATCONVERTER_CASE_SRCFORMAT(GraphicsContext3D::DataFormatRGB32F)
+            FORMATCONVERTER_CASE_SRCFORMAT(GraphicsContext3D::DataFormatRGBA8)
+            FORMATCONVERTER_CASE_SRCFORMAT(GraphicsContext3D::DataFormatRGBA16Little)
+            FORMATCONVERTER_CASE_SRCFORMAT(GraphicsContext3D::DataFormatRGBA16Big)
+            FORMATCONVERTER_CASE_SRCFORMAT(GraphicsContext3D::DataFormatRGB16Little)
+            FORMATCONVERTER_CASE_SRCFORMAT(GraphicsContext3D::DataFormatRGB16Big)
+            FORMATCONVERTER_CASE_SRCFORMAT(GraphicsContext3D::DataFormatBGRA16Little)
+            FORMATCONVERTER_CASE_SRCFORMAT(GraphicsContext3D::DataFormatBGRA16Big)
+            FORMATCONVERTER_CASE_SRCFORMAT(GraphicsContext3D::DataFormatARGB8)
+            FORMATCONVERTER_CASE_SRCFORMAT(GraphicsContext3D::DataFormatARGB16Little)
+            FORMATCONVERTER_CASE_SRCFORMAT(GraphicsContext3D::DataFormatARGB16Big)
+            FORMATCONVERTER_CASE_SRCFORMAT(GraphicsContext3D::DataFormatABGR8)
+            FORMATCONVERTER_CASE_SRCFORMAT(GraphicsContext3D::DataFormatAR8)
+            FORMATCONVERTER_CASE_SRCFORMAT(GraphicsContext3D::DataFormatR16Little)
+            FORMATCONVERTER_CASE_SRCFORMAT(GraphicsContext3D::DataFormatR16Big)
+            FORMATCONVERTER_CASE_SRCFORMAT(GraphicsContext3D::DataFormatRA16Little)
+            FORMATCONVERTER_CASE_SRCFORMAT(GraphicsContext3D::DataFormatRA16Big)
+            FORMATCONVERTER_CASE_SRCFORMAT(GraphicsContext3D::DataFormatAR16Little)
+            FORMATCONVERTER_CASE_SRCFORMAT(GraphicsContext3D::DataFormatAR16Big)
+            FORMATCONVERTER_CASE_SRCFORMAT(GraphicsContext3D::DataFormatA16Little)
+            FORMATCONVERTER_CASE_SRCFORMAT(GraphicsContext3D::DataFormatA16Big)
+            FORMATCONVERTER_CASE_SRCFORMAT(GraphicsContext3D::DataFormatBGRA8)
+            FORMATCONVERTER_CASE_SRCFORMAT(GraphicsContext3D::DataFormatRGBA5551)
+            FORMATCONVERTER_CASE_SRCFORMAT(GraphicsContext3D::DataFormatRGBA4444)
+            FORMATCONVERTER_CASE_SRCFORMAT(GraphicsContext3D::DataFormatRGBA32F)
+        default:
+            ASSERT_NOT_REACHED();
         }
-    } else {
-        OwnArrayPtr<IntermediateType> temporaryRGBAData = adoptArrayPtr(new IntermediateType[width * 4]);
-        const SourceType* endPointer = sourceData + height * sourceElementsPerRow;
-        unsigned int destinationElementsPerRow = width * destinationElementsPerPixel;
-        while (sourceData < endPointer) {
-            rowUnpackingFunc(sourceData, temporaryRGBAData.get(), width);
-            rowPackingFunc(temporaryRGBAData.get(), destinationData, width);
-            sourceData += sourceElementsPerRow;
-            destinationData += destinationElementsPerRow;
+#undef FORMATCONVERTER_CASE_SRCFORMAT
+}
+
+template<GraphicsContext3D::DataFormat SrcFormat>
+ALWAYS_INLINE void FormatConverter::convert(GraphicsContext3D::DataFormat dstFormat, GraphicsContext3D::AlphaOp alphaOp)
+{
+#define FORMATCONVERTER_CASE_DSTFORMAT(DstFormat) \
+    case DstFormat: \
+        return convert<SrcFormat, DstFormat>(alphaOp);
+
+        switch (dstFormat) {
+            FORMATCONVERTER_CASE_DSTFORMAT(GraphicsContext3D::DataFormatR8)
+            FORMATCONVERTER_CASE_DSTFORMAT(GraphicsContext3D::DataFormatA8)
+            FORMATCONVERTER_CASE_DSTFORMAT(GraphicsContext3D::DataFormatR32F)
+            FORMATCONVERTER_CASE_DSTFORMAT(GraphicsContext3D::DataFormatA32F)
+            FORMATCONVERTER_CASE_DSTFORMAT(GraphicsContext3D::DataFormatRA8)
+            FORMATCONVERTER_CASE_DSTFORMAT(GraphicsContext3D::DataFormatRA32F)
+            FORMATCONVERTER_CASE_DSTFORMAT(GraphicsContext3D::DataFormatRGB8)
+            FORMATCONVERTER_CASE_DSTFORMAT(GraphicsContext3D::DataFormatRGB565)
+            FORMATCONVERTER_CASE_DSTFORMAT(GraphicsContext3D::DataFormatRGB32F)
+            FORMATCONVERTER_CASE_DSTFORMAT(GraphicsContext3D::DataFormatRGBA8)
+            FORMATCONVERTER_CASE_DSTFORMAT(GraphicsContext3D::DataFormatRGBA5551)
+            FORMATCONVERTER_CASE_DSTFORMAT(GraphicsContext3D::DataFormatRGBA4444)
+            FORMATCONVERTER_CASE_DSTFORMAT(GraphicsContext3D::DataFormatRGBA32F)
+        default:
+            ASSERT_NOT_REACHED();
         }
-    }
+
+#undef FORMATCONVERTER_CASE_DSTFORMAT
 }
 
-template<typename SourceType>
-static unsigned int computeSourceElementsPerRow(unsigned int width,
-                                                unsigned int bytesPerPixel,
-                                                unsigned int unpackAlignment)
-{
-    unsigned int elementSizeInBytes = sizeof(SourceType);
-    ASSERT(elementSizeInBytes <= bytesPerPixel);
-    ASSERT(!(bytesPerPixel % elementSizeInBytes));
-    unsigned int validRowBytes = width * bytesPerPixel;
-    unsigned int totalRowBytes = validRowBytes;
-    if (unpackAlignment) {
-        unsigned int remainder = validRowBytes % unpackAlignment;
-        if (remainder)
-            totalRowBytes += (unpackAlignment - remainder);
-    }
-    return totalRowBytes / elementSizeInBytes;
-}
-
-// This handles all conversions with a faster path for tightly packed RGBA8 source data.
-template<typename DestType>
-static void doPacking(const void* sourceData,
-                      GraphicsContext3D::SourceDataFormat sourceDataFormat,
-                      unsigned int width,
-                      unsigned int height,
-                      unsigned int sourceUnpackAlignment,
-                      DestType* destinationData,
-                      void (*rowPackingFunc)(const uint8_t*, DestType*, unsigned int),
-                      unsigned int destinationElementsPerPixel)
-{
-    switch (sourceDataFormat) {
-    case GraphicsContext3D::SourceFormatRGBA8: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<uint8_t>(width, 4, sourceUnpackAlignment);
-        const uint8_t* source = static_cast<const uint8_t*>(sourceData);
-        const uint8_t* endPointer = source + height * sourceElementsPerRow;
-        unsigned int destinationElementsPerRow = width * destinationElementsPerPixel;
-        while (source < endPointer) {
-            if (rowPackingFunc)
-                rowPackingFunc(source, destinationData, width);
-            else
-                memcpy(destinationData, source, width * 4);
-            source += sourceElementsPerRow;
-            destinationData += destinationElementsPerRow;
+template<GraphicsContext3D::DataFormat SrcFormat, GraphicsContext3D::DataFormat DstFormat>
+ALWAYS_INLINE void FormatConverter::convert(GraphicsContext3D::AlphaOp alphaOp)
+{
+#define FORMATCONVERTER_CASE_ALPHAOP(alphaOp) \
+    case alphaOp: \
+        return convert<SrcFormat, DstFormat, alphaOp>();
+
+        switch (alphaOp) {
+            FORMATCONVERTER_CASE_ALPHAOP(GraphicsContext3D::AlphaDoNothing)
+            FORMATCONVERTER_CASE_ALPHAOP(GraphicsContext3D::AlphaDoPremultiply)
+            FORMATCONVERTER_CASE_ALPHAOP(GraphicsContext3D::AlphaDoUnmultiply)
+        default:
+            ASSERT_NOT_REACHED();
         }
-        break;
-    }
-    case GraphicsContext3D::SourceFormatRGBA16Little: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<uint16_t>(width, 8, sourceUnpackAlignment);
-        doUnpackingAndPacking<uint16_t, uint8_t, DestType>(static_cast<const uint16_t*>(sourceData), unpackOneRowOfRGBA16LittleToRGBA8, width, height, sourceElementsPerRow, destinationData, rowPackingFunc, destinationElementsPerPixel);
-        break;
-    }
-    case GraphicsContext3D::SourceFormatRGBA16Big: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<uint16_t>(width, 8, sourceUnpackAlignment);
-        doUnpackingAndPacking<uint16_t, uint8_t, DestType>(static_cast<const uint16_t*>(sourceData), unpackOneRowOfRGBA16BigToRGBA8, width, height, sourceElementsPerRow, destinationData, rowPackingFunc, destinationElementsPerPixel);
-        break;
-    }
-    case GraphicsContext3D::SourceFormatRGB8: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<uint8_t>(width, 3, sourceUnpackAlignment);
-        doUnpackingAndPacking<uint8_t, uint8_t, DestType>(static_cast<const uint8_t*>(sourceData), unpackOneRowOfRGB8ToRGBA8, width, height, sourceElementsPerRow, destinationData, rowPackingFunc, destinationElementsPerPixel);
-        break;
-    }
-    case GraphicsContext3D::SourceFormatRGB16Little: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<uint16_t>(width, 6, sourceUnpackAlignment);
-        doUnpackingAndPacking<uint16_t, uint8_t, DestType>(static_cast<const uint16_t*>(sourceData), unpackOneRowOfRGB16LittleToRGBA8, width, height, sourceElementsPerRow, destinationData, rowPackingFunc, destinationElementsPerPixel);
-        break;
-    }
-    case GraphicsContext3D::SourceFormatRGB16Big: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<uint16_t>(width, 6, sourceUnpackAlignment);
-        doUnpackingAndPacking<uint16_t, uint8_t, DestType>(static_cast<const uint16_t*>(sourceData), unpackOneRowOfRGB16BigToRGBA8, width, height, sourceElementsPerRow, destinationData, rowPackingFunc, destinationElementsPerPixel);
-        break;
-    }
-    case GraphicsContext3D::SourceFormatBGR8: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<uint8_t>(width, 3, sourceUnpackAlignment);
-        doUnpackingAndPacking<uint8_t, uint8_t, DestType>(static_cast<const uint8_t*>(sourceData), unpackOneRowOfBGR8ToRGBA8, width, height, sourceElementsPerRow, destinationData, rowPackingFunc, destinationElementsPerPixel);
-        break;
-    }
-    case GraphicsContext3D::SourceFormatARGB8: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<uint8_t>(width, 4, sourceUnpackAlignment);
-        doUnpackingAndPacking<uint8_t, uint8_t, DestType>(static_cast<const uint8_t*>(sourceData), unpackOneRowOfARGB8ToRGBA8, width, height, sourceElementsPerRow, destinationData, rowPackingFunc, destinationElementsPerPixel);
-        break;
-    }
-    case GraphicsContext3D::SourceFormatARGB16Little: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<uint16_t>(width, 8, sourceUnpackAlignment);
-        doUnpackingAndPacking<uint16_t, uint8_t, DestType>(static_cast<const uint16_t*>(sourceData), unpackOneRowOfARGB16LittleToRGBA8, width, height, sourceElementsPerRow, destinationData, rowPackingFunc, destinationElementsPerPixel);
-        break;
-    }
-    case GraphicsContext3D::SourceFormatARGB16Big: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<uint16_t>(width, 8, sourceUnpackAlignment);
-        doUnpackingAndPacking<uint16_t, uint8_t, DestType>(static_cast<const uint16_t*>(sourceData), unpackOneRowOfARGB16BigToRGBA8, width, height, sourceElementsPerRow, destinationData, rowPackingFunc, destinationElementsPerPixel);
-        break;
-    }
-    case GraphicsContext3D::SourceFormatABGR8: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<uint8_t>(width, 4, sourceUnpackAlignment);
-        doUnpackingAndPacking<uint8_t, uint8_t, DestType>(static_cast<const uint8_t*>(sourceData), unpackOneRowOfABGR8ToRGBA8, width, height, sourceElementsPerRow, destinationData, rowPackingFunc, destinationElementsPerPixel);
-        break;
-    }
-    case GraphicsContext3D::SourceFormatBGRA8: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<uint8_t>(width, 4, sourceUnpackAlignment);
-        doUnpackingAndPacking<uint8_t, uint8_t, DestType>(static_cast<const uint8_t*>(sourceData), unpackOneRowOfBGRA8ToRGBA8, width, height, sourceElementsPerRow, destinationData, rowPackingFunc, destinationElementsPerPixel);
-        break;
-    }
-    case GraphicsContext3D::SourceFormatBGRA16Little: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<uint16_t>(width, 8, sourceUnpackAlignment);
-        doUnpackingAndPacking<uint16_t, uint8_t, DestType>(static_cast<const uint16_t*>(sourceData), unpackOneRowOfBGRA16LittleToRGBA8, width, height, sourceElementsPerRow, destinationData, rowPackingFunc, destinationElementsPerPixel);
-        break;
-    }
-    case GraphicsContext3D::SourceFormatBGRA16Big: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<uint16_t>(width, 8, sourceUnpackAlignment);
-        doUnpackingAndPacking<uint16_t, uint8_t, DestType>(static_cast<const uint16_t*>(sourceData), unpackOneRowOfBGRA16BigToRGBA8, width, height, sourceElementsPerRow, destinationData, rowPackingFunc, destinationElementsPerPixel);
-        break;
-    }
-    case GraphicsContext3D::SourceFormatRGBA5551: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<uint16_t>(width, 2, sourceUnpackAlignment);
-        doUnpackingAndPacking<uint16_t, uint8_t, DestType>(static_cast<const uint16_t*>(sourceData), unpackOneRowOfRGBA5551ToRGBA8, width, height, sourceElementsPerRow, destinationData, rowPackingFunc, destinationElementsPerPixel);
-        break;
-    }
-    case GraphicsContext3D::SourceFormatRGBA4444: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<uint16_t>(width, 2, sourceUnpackAlignment);
-        doUnpackingAndPacking<uint16_t, uint8_t, DestType>(static_cast<const uint16_t*>(sourceData), unpackOneRowOfRGBA4444ToRGBA8, width, height, sourceElementsPerRow, destinationData, rowPackingFunc, destinationElementsPerPixel);
-        break;
-    }
-    case GraphicsContext3D::SourceFormatRGB565: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<uint16_t>(width, 2, sourceUnpackAlignment);
-        doUnpackingAndPacking<uint16_t, uint8_t, DestType>(static_cast<const uint16_t*>(sourceData), unpackOneRowOfRGB565ToRGBA8, width, height, sourceElementsPerRow, destinationData, rowPackingFunc, destinationElementsPerPixel);
-        break;
-    }
-    case GraphicsContext3D::SourceFormatR8: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<uint8_t>(width, 1, sourceUnpackAlignment);
-        doUnpackingAndPacking<uint8_t, uint8_t, DestType>(static_cast<const uint8_t*>(sourceData), unpackOneRowOfR8ToRGBA8, width, height, sourceElementsPerRow, destinationData, rowPackingFunc, destinationElementsPerPixel);
-        break;
-    }
-    case GraphicsContext3D::SourceFormatR16Little: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<uint16_t>(width, 2, sourceUnpackAlignment);
-        doUnpackingAndPacking<uint16_t, uint8_t, DestType>(static_cast<const uint16_t*>(sourceData), unpackOneRowOfR16LittleToRGBA8, width, height, sourceElementsPerRow, destinationData, rowPackingFunc, destinationElementsPerPixel);
-        break;
-    }
-    case GraphicsContext3D::SourceFormatR16Big: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<uint16_t>(width, 2, sourceUnpackAlignment);
-        doUnpackingAndPacking<uint16_t, uint8_t, DestType>(static_cast<const uint16_t*>(sourceData), unpackOneRowOfR16BigToRGBA8, width, height, sourceElementsPerRow, destinationData, rowPackingFunc, destinationElementsPerPixel);
-        break;
-    }
-    case GraphicsContext3D::SourceFormatRA8: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<uint8_t>(width, 2, sourceUnpackAlignment);
-        doUnpackingAndPacking<uint8_t, uint8_t, DestType>(static_cast<const uint8_t*>(sourceData), unpackOneRowOfRA8ToRGBA8, width, height, sourceElementsPerRow, destinationData, rowPackingFunc, destinationElementsPerPixel);
-        break;
-    }
-    case GraphicsContext3D::SourceFormatRA16Little: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<uint16_t>(width, 4, sourceUnpackAlignment);
-        doUnpackingAndPacking<uint16_t, uint8_t, DestType>(static_cast<const uint16_t*>(sourceData), unpackOneRowOfRA16LittleToRGBA8, width, height, sourceElementsPerRow, destinationData, rowPackingFunc, destinationElementsPerPixel);
-        break;
-    }
-    case GraphicsContext3D::SourceFormatRA16Big: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<uint16_t>(width, 4, sourceUnpackAlignment);
-        doUnpackingAndPacking<uint16_t, uint8_t, DestType>(static_cast<const uint16_t*>(sourceData), unpackOneRowOfRA16BigToRGBA8, width, height, sourceElementsPerRow, destinationData, rowPackingFunc, destinationElementsPerPixel);
-        break;
-    }
-    case GraphicsContext3D::SourceFormatAR8: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<uint8_t>(width, 2, sourceUnpackAlignment);
-        doUnpackingAndPacking<uint8_t, uint8_t, DestType>(static_cast<const uint8_t*>(sourceData), unpackOneRowOfAR8ToRGBA8, width, height, sourceElementsPerRow, destinationData, rowPackingFunc, destinationElementsPerPixel);
-        break;
-    }
-    case GraphicsContext3D::SourceFormatAR16Little: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<uint16_t>(width, 4, sourceUnpackAlignment);
-        doUnpackingAndPacking<uint16_t, uint8_t, DestType>(static_cast<const uint16_t*>(sourceData), unpackOneRowOfAR16LittleToRGBA8, width, height, sourceElementsPerRow, destinationData, rowPackingFunc, destinationElementsPerPixel);
-        break;
+#undef FORMATCONVERTER_CASE_ALPHAOP
+}
+
+template<GraphicsContext3D::DataFormat SrcFormat, GraphicsContext3D::DataFormat DstFormat, GraphicsContext3D::AlphaOp alphaOp>
+ALWAYS_INLINE void FormatConverter::convert()
+{
+    // Many instantiations of this template function will never be entered, so we try
+    // to return immediately in these cases to avoid the compiler to generate useless code.
+    if (SrcFormat == DstFormat && alphaOp == GraphicsContext3D::AlphaDoNothing) {
+        ASSERT_NOT_REACHED();
+        return;
     }
-    case GraphicsContext3D::SourceFormatAR16Big: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<uint16_t>(width, 4, sourceUnpackAlignment);
-        doUnpackingAndPacking<uint16_t, uint8_t, DestType>(static_cast<const uint16_t*>(sourceData), unpackOneRowOfAR16BigToRGBA8, width, height, sourceElementsPerRow, destinationData, rowPackingFunc, destinationElementsPerPixel);
-        break;
+    if (IsFloatFormat<DstFormat>::Value && !IsFloatFormat<SrcFormat>::Value && SrcFormat != GraphicsContext3D::DataFormatRGBA8 && SrcFormat != GraphicsContext3D::DataFormatBGRA8) {
+        ASSERT_NOT_REACHED();
+        return;
     }
-    case GraphicsContext3D::SourceFormatA8: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<uint8_t>(width, 1, sourceUnpackAlignment);
-        doUnpackingAndPacking<uint8_t, uint8_t, DestType>(static_cast<const uint8_t*>(sourceData), unpackOneRowOfA8ToRGBA8, width, height, sourceElementsPerRow, destinationData, rowPackingFunc, destinationElementsPerPixel);
-        break;
+    if (!IsFloatFormat<DstFormat>::Value && IsFloatFormat<SrcFormat>::Value) {
+        ASSERT_NOT_REACHED();
+        return;
     }
-    case GraphicsContext3D::SourceFormatA16Little: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<uint16_t>(width, 2, sourceUnpackAlignment);
-        doUnpackingAndPacking<uint16_t, uint8_t, DestType>(static_cast<const uint16_t*>(sourceData), unpackOneRowOfA16LittleToRGBA8, width, height, sourceElementsPerRow, destinationData, rowPackingFunc, destinationElementsPerPixel);
-        break;
+
+    // Only textures uploaded from DOM elements or ImageData can allow DstFormat != SrcFormat.
+    const bool srcFormatComesFromDOMElementOrImageData = GraphicsContext3D::srcFormatComeFromDOMElementOrImageData(SrcFormat);
+    if (!srcFormatComesFromDOMElementOrImageData && SrcFormat != DstFormat) {
+        ASSERT_NOT_REACHED();
+        return;
     }
-    case GraphicsContext3D::SourceFormatA16Big: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<uint16_t>(width, 2, sourceUnpackAlignment);
-        doUnpackingAndPacking<uint16_t, uint8_t, DestType>(static_cast<const uint16_t*>(sourceData), unpackOneRowOfA16BigToRGBA8, width, height, sourceElementsPerRow, destinationData, rowPackingFunc, destinationElementsPerPixel);
-        break;
+    // Likewise, only textures uploaded from DOM elements or ImageData can possibly have to be unpremultiplied.
+    if (!srcFormatComesFromDOMElementOrImageData && alphaOp == GraphicsContext3D::AlphaDoUnmultiply) {
+        ASSERT_NOT_REACHED();
+        return;
     }
-    default:
+    if ((!HasAlpha(SrcFormat) || !HasColor(SrcFormat) || !HasColor(DstFormat)) && alphaOp != GraphicsContext3D::AlphaDoNothing) {
         ASSERT_NOT_REACHED();
+        return;
     }
-}
 
-// This specialized routine is used only for floating-point texture uploads. It
-// does not need to be as general as doPacking, above; because there are
-// currently no native floating-point image formats in WebKit, there are only a
-// few upload paths.
-static void doFloatingPointPacking(const void* sourceData,
-                                   GraphicsContext3D::SourceDataFormat sourceDataFormat,
-                                   unsigned int width,
-                                   unsigned int height,
-                                   unsigned int sourceUnpackAlignment,
-                                   float* destinationData,
-                                   void rowPackingFunc(const float*, float*, unsigned int),
-                                   unsigned int destinationElementsPerPixel)
-{
-    switch (sourceDataFormat) {
-    case GraphicsContext3D::SourceFormatRGBA8: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<uint8_t>(width, 4, sourceUnpackAlignment);
-        doUnpackingAndPacking<uint8_t, float, float>(static_cast<const uint8_t*>(sourceData), unpackOneRowOfRGBA8ToRGBA32F, width, height, sourceElementsPerRow, destinationData, rowPackingFunc, destinationElementsPerPixel);
-        break;
-    }
-    case GraphicsContext3D::SourceFormatBGRA8: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<uint8_t>(width, 4, sourceUnpackAlignment);
-        doUnpackingAndPacking<uint8_t, float, float>(static_cast<const uint8_t*>(sourceData), unpackOneRowOfBGRA8ToRGBA32F, width, height, sourceElementsPerRow, destinationData, rowPackingFunc, destinationElementsPerPixel);
-        break;
-    }
-    case GraphicsContext3D::SourceFormatRGBA32F: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<float>(width, 4, sourceUnpackAlignment);
-        const float* source = static_cast<const float*>(sourceData);
-        const float* endPointer = source + height * sourceElementsPerRow;
-        unsigned int destinationElementsPerRow = width * destinationElementsPerPixel;
-        while (source < endPointer) {
-            rowPackingFunc(source, destinationData, width);
-            source += sourceElementsPerRow;
-            destinationData += destinationElementsPerRow;
+    typedef typename DataTypeForFormat<SrcFormat>::Type SrcType;
+    typedef typename DataTypeForFormat<DstFormat>::Type DstType;
+    const int IntermediateSrcFormat = IntermediateFormat<DstFormat>::Value;
+    typedef typename DataTypeForFormat<IntermediateSrcFormat>::Type IntermediateSrcType;
+    const ptrdiff_t srcStrideInElements = m_srcStride / sizeof(SrcType);
+    const ptrdiff_t dstStrideInElements = m_dstStride / sizeof(DstType);
+    const bool trivialUnpack = (SrcFormat == GraphicsContext3D::DataFormatRGBA8 && !IsFloatFormat<DstFormat>::Value) || SrcFormat == GraphicsContext3D::DataFormatRGBA32F;
+    const bool trivialPack = (DstFormat == GraphicsContext3D::DataFormatRGBA8 || DstFormat == GraphicsContext3D::DataFormatRGBA32F) && alphaOp == GraphicsContext3D::AlphaDoNothing && m_dstStride > 0;
+    ASSERT(!trivialUnpack || !trivialPack);
+
+    const SrcType *srcRowStart = static_cast<const SrcType*>(m_srcStart);
+    DstType* dstRowStart = static_cast<DstType*>(m_dstStart);
+    if (!trivialUnpack && trivialPack) {
+        for (size_t i = 0; i < m_height; ++i) {
+            unpack<SrcFormat>(srcRowStart, dstRowStart, m_width);
+            srcRowStart += srcStrideInElements;
+            dstRowStart += dstStrideInElements;
+        }
+    } else if (!trivialUnpack && !trivialPack) {
+        for (size_t i = 0; i < m_height; ++i) {
+            unpack<SrcFormat>(srcRowStart, reinterpret_cast<IntermediateSrcType*>(unpackedIntermediateSrcData.get()), m_width);
+            pack<DstFormat, alphaOp>(reinterpret_cast<IntermediateSrcType*>(unpackedIntermediateSrcData.get()), dstRowStart, m_width);
+            srcRowStart += srcStrideInElements;
+            dstRowStart += dstStrideInElements;
+        }
+    } else {
+        for (size_t i = 0; i < m_height; ++i) {
+            pack<DstFormat, alphaOp>(srcRowStart, dstRowStart, m_width);
+            srcRowStart += srcStrideInElements;
+            dstRowStart += dstStrideInElements;
         }
-        break;
-    }
-    case GraphicsContext3D::SourceFormatRGB32F: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<float>(width, 3, sourceUnpackAlignment);
-        doUnpackingAndPacking<float, float, float>(static_cast<const float*>(sourceData), unpackOneRowOfRGB32FToRGBA32F, width, height, sourceElementsPerRow, destinationData, rowPackingFunc, destinationElementsPerPixel);
-        break;
-    }
-    case GraphicsContext3D::SourceFormatR32F: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<float>(width, 1, sourceUnpackAlignment);
-        doUnpackingAndPacking<float, float, float>(static_cast<const float*>(sourceData), unpackOneRowOfR32FToRGBA32F, width, height, sourceElementsPerRow, destinationData, rowPackingFunc, destinationElementsPerPixel);
-        break;
-    }
-    case GraphicsContext3D::SourceFormatRA32F: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<float>(width, 2, sourceUnpackAlignment);
-        doUnpackingAndPacking<float, float, float>(static_cast<const float*>(sourceData), unpackOneRowOfRA32FToRGBA32F, width, height, sourceElementsPerRow, destinationData, rowPackingFunc, destinationElementsPerPixel);
-        break;
-    }
-    case GraphicsContext3D::SourceFormatA32F: {
-        unsigned int sourceElementsPerRow = computeSourceElementsPerRow<float>(width, 1, sourceUnpackAlignment);
-        doUnpackingAndPacking<float, float, float>(static_cast<const float*>(sourceData), unpackOneRowOfA32FToRGBA32F, width, height, sourceElementsPerRow, destinationData, rowPackingFunc, destinationElementsPerPixel);
-        break;
-    }
-    default:
-        ASSERT_NOT_REACHED();
     }
+    m_success = true;
+    return;
 }
 
+} // anonymous namespace
 
-#if !ASSERT_DISABLED
-static bool isFloatingPointSource(GraphicsContext3D::SourceDataFormat format)
+bool GraphicsContext3D::packPixels(const uint8_t* sourceData, DataFormat sourceDataFormat, unsigned width, unsigned height, unsigned sourceUnpackAlignment, unsigned destinationFormat, unsigned destinationType, AlphaOp alphaOp, void* destinationData, bool flipY)
 {
-    switch (format) {
-    case GraphicsContext3D::SourceFormatRGBA32F:
-    case GraphicsContext3D::SourceFormatRGB32F:
-    case GraphicsContext3D::SourceFormatRA32F:
-    case GraphicsContext3D::SourceFormatR32F:
-    case GraphicsContext3D::SourceFormatA32F:
-        return true;
-    default:
-        return false;
-    }
-}
-#endif
+    int validSrc = width * TexelBytesForFormat(sourceDataFormat);
+    int remainder = sourceUnpackAlignment ? (validSrc % sourceUnpackAlignment) : 0;
+    int srcStride = remainder ? (validSrc + sourceUnpackAlignment - remainder) : validSrc;
 
-bool GraphicsContext3D::packPixels(const uint8_t* sourceData,
-                                   GraphicsContext3D::SourceDataFormat sourceDataFormat,
-                                   unsigned int width,
-                                   unsigned int height,
-                                   unsigned int sourceUnpackAlignment,
-                                   unsigned int destinationFormat,
-                                   unsigned int destinationType,
-                                   AlphaOp alphaOp,
-                                   void* destinationData)
-{
-    switch (destinationType) {
-    case UNSIGNED_BYTE: {
-        uint8_t* destination = static_cast<uint8_t*>(destinationData);
-        if (sourceDataFormat == SourceFormatRGBA8 && destinationFormat == RGBA && sourceUnpackAlignment <= 4 && alphaOp == AlphaDoNothing) {
-            // No conversion necessary.
-            memcpy(destinationData, sourceData, width * height * 4);
-            break;
-        }
-        switch (destinationFormat) {
-        case RGB:
-            switch (alphaOp) {
-            case AlphaDoNothing:
-                doPacking<uint8_t>(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, packOneRowOfRGBA8ToRGB8, 3);
-                break;
-            case AlphaDoPremultiply:
-                doPacking<uint8_t>(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, packOneRowOfRGBA8ToRGB8Premultiply, 3);
-                break;
-            case AlphaDoUnmultiply:
-                doPacking<uint8_t>(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, packOneRowOfRGBA8ToRGB8Unmultiply, 3);
-                break;
-            }
-            break;
-        case RGBA:
-            switch (alphaOp) {
-            case AlphaDoNothing:
-                ASSERT(sourceDataFormat != SourceFormatRGBA8 || sourceUnpackAlignment > 4); // Handled above with fast case.
-                doPacking<uint8_t>(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, 0, 4);
-                break;
-            case AlphaDoPremultiply:
-                doPacking<uint8_t>(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, packOneRowOfRGBA8ToRGBA8Premultiply, 4);
-                break;
-            case AlphaDoUnmultiply:
-                doPacking<uint8_t>(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, packOneRowOfRGBA8ToRGBA8Unmultiply, 4);
-                break;
-            default:
-                ASSERT_NOT_REACHED();
-            }
-            break;
-        case ALPHA:
-            // From the desktop OpenGL conversion rules (OpenGL 2.1
-            // specification, Table 3.15), the alpha channel is chosen
-            // from the RGBA data.
-            doPacking<uint8_t>(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, packOneRowOfRGBA8ToA8, 1);
-            break;
-        case LUMINANCE:
-            // From the desktop OpenGL conversion rules (OpenGL 2.1
-            // specification, Table 3.15), the red channel is chosen
-            // from the RGBA data.
-            switch (alphaOp) {
-            case AlphaDoNothing:
-                doPacking<uint8_t>(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, packOneRowOfRGBA8ToR8, 1);
-                break;
-            case AlphaDoPremultiply:
-                doPacking<uint8_t>(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, packOneRowOfRGBA8ToR8Premultiply, 1);
-                break;
-            case AlphaDoUnmultiply:
-                doPacking<uint8_t>(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, packOneRowOfRGBA8ToR8Unmultiply, 1);
-                break;
-            }
-            break;
-        case LUMINANCE_ALPHA:
-            // From the desktop OpenGL conversion rules (OpenGL 2.1
-            // specification, Table 3.15), the red and alpha channels
-            // are chosen from the RGBA data.
-            switch (alphaOp) {
-            case AlphaDoNothing:
-                doPacking<uint8_t>(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, packOneRowOfRGBA8ToRA8, 2);
-                break;
-            case AlphaDoPremultiply:
-                doPacking<uint8_t>(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, packOneRowOfRGBA8ToRA8Premultiply, 2);
-                break;
-            case AlphaDoUnmultiply:
-                doPacking<uint8_t>(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, packOneRowOfRGBA8ToRA8Unmultiply, 2);
-                break;
-            }
-            break;
-        }
-        break;
-    }
-    case UNSIGNED_SHORT_4_4_4_4: {
-        uint16_t* destination = static_cast<uint16_t*>(destinationData);
-        switch (alphaOp) {
-        case AlphaDoNothing:
-            doPacking<uint16_t>(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, packOneRowOfRGBA8ToUnsignedShort4444, 1);
-            break;
-        case AlphaDoPremultiply:
-            doPacking<uint16_t>(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, packOneRowOfRGBA8ToUnsignedShort4444Premultiply, 1);
-            break;
-        case AlphaDoUnmultiply:
-            doPacking<uint16_t>(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, packOneRowOfRGBA8ToUnsignedShort4444Unmultiply, 1);
-            break;
-        }
-        break;
-    }
-    case UNSIGNED_SHORT_5_5_5_1: {
-        uint16_t* destination = static_cast<uint16_t*>(destinationData);
-        switch (alphaOp) {
-        case AlphaDoNothing:
-            doPacking<uint16_t>(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, packOneRowOfRGBA8ToUnsignedShort5551, 1);
-            break;
-        case AlphaDoPremultiply:
-            doPacking<uint16_t>(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, packOneRowOfRGBA8ToUnsignedShort5551Premultiply, 1);
-            break;
-        case AlphaDoUnmultiply:
-            doPacking<uint16_t>(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, packOneRowOfRGBA8ToUnsignedShort5551Unmultiply, 1);
-            break;
-        }
-        break;
-    }
-    case UNSIGNED_SHORT_5_6_5: {
-        uint16_t* destination = static_cast<uint16_t*>(destinationData);
-        switch (alphaOp) {
-        case AlphaDoNothing:
-            doPacking<uint16_t>(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, packOneRowOfRGBA8ToUnsignedShort565, 1);
-            break;
-        case AlphaDoPremultiply:
-            doPacking<uint16_t>(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, packOneRowOfRGBA8ToUnsignedShort565Premultiply, 1);
-            break;
-        case AlphaDoUnmultiply:
-            doPacking<uint16_t>(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, packOneRowOfRGBA8ToUnsignedShort565Unmultiply, 1);
-            break;
-        }
-        break;
-    }
-    case FLOAT: {
-        // OpenGL ES, and therefore WebGL, require that the format and
-        // internalformat be identical. This means that whenever the
-        // developer supplies an ArrayBufferView on this code path,
-        // the source data will be in a floating-point format.
-        //
-        // The only time the source data will not be floating-point is
-        // when uploading a DOM element or ImageData as a
-        // floating-point texture. Only RGBA8 and BGRA8 are handled in
-        // this case.
-        ASSERT(isFloatingPointSource(sourceDataFormat)
-               || sourceDataFormat == SourceFormatRGBA8
-               || sourceDataFormat == SourceFormatBGRA8);
-        // When uploading a canvas into a floating-point texture,
-        // unmultiplication may be necessary.
-        ASSERT((alphaOp == AlphaDoNothing || alphaOp == AlphaDoPremultiply)
-               || !isFloatingPointSource(sourceDataFormat));
-        // For the source formats with an even number of channels (RGBA32F,
-        // RA32F) it is guaranteed that the pixel data is tightly packed because
-        // unpack alignment <= sizeof(float) * number of channels.
-        float* destination = static_cast<float*>(destinationData);
-        if (alphaOp == AlphaDoNothing
-            && ((sourceDataFormat == SourceFormatRGBA32F && destinationFormat == RGBA)
-                || (sourceDataFormat == SourceFormatRA32F && destinationFormat == LUMINANCE_ALPHA))) {
-            // No conversion necessary.
-            int numChannels = (sourceDataFormat == SourceFormatRGBA32F ? 4 : 2);
-            memcpy(destinationData, sourceData, width * height * numChannels * sizeof(float));
-            break;
-        }
-        switch (destinationFormat) {
-        case RGB:
-            switch (alphaOp) {
-            case AlphaDoNothing:
-                doFloatingPointPacking(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, packOneRowOfRGBA32FToRGB32F, 3);
-                break;
-            case AlphaDoPremultiply:
-                doFloatingPointPacking(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, packOneRowOfRGBA32FToRGB32FPremultiply, 3);
-                break;
-            case AlphaDoUnmultiply:
-                doFloatingPointPacking(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, packOneRowOfRGBA32FToRGB32FUnmultiply, 3);
-                break;
-            }
-            break;
-        case RGBA:
-            // AlphaDoNothing for RGBA32F -> RGBA is handled above with fast path.
-            ASSERT(alphaOp != AlphaDoNothing || sourceDataFormat != SourceFormatRGBA32F);
-            switch (alphaOp) {
-            case AlphaDoNothing:
-                doFloatingPointPacking(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, packOneRowOfRGBA32FToRGBA32F, 4);
-                break;
-            case AlphaDoPremultiply:
-                doFloatingPointPacking(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, packOneRowOfRGBA32FToRGBA32FPremultiply, 4);
-                break;
-            case AlphaDoUnmultiply:
-                doFloatingPointPacking(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, packOneRowOfRGBA32FToRGBA32FUnmultiply, 4);
-                break;
-            }
-            break;
-        case ALPHA:
-            // From the desktop OpenGL conversion rules (OpenGL 2.1
-            // specification, Table 3.15), the alpha channel is chosen
-            // from the RGBA data.
-            doFloatingPointPacking(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, packOneRowOfRGBA32FToA32F, 1);
-            break;
-        case LUMINANCE:
-            // From the desktop OpenGL conversion rules (OpenGL 2.1
-            // specification, Table 3.15), the red channel is chosen
-            // from the RGBA data.
-            switch (alphaOp) {
-            case AlphaDoNothing:
-                doFloatingPointPacking(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, packOneRowOfRGBA32FToR32F, 1);
-                break;
-            case AlphaDoPremultiply:
-                doFloatingPointPacking(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, packOneRowOfRGBA32FToR32FPremultiply, 1);
-                break;
-            case AlphaDoUnmultiply:
-                doFloatingPointPacking(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, packOneRowOfRGBA32FToR32FUnmultiply, 1);
-                break;
-            }
-            break;
-        case LUMINANCE_ALPHA:
-            // From the desktop OpenGL conversion rules (OpenGL 2.1
-            // specification, Table 3.15), the red and alpha channels
-            // are chosen from the RGBA data.
-            switch (alphaOp) {
-            case AlphaDoNothing:
-                doFloatingPointPacking(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, packOneRowOfRGBA32FToRA32F, 2);
-                break;
-            case AlphaDoPremultiply:
-                doFloatingPointPacking(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, packOneRowOfRGBA32FToRA32FPremultiply, 2);
-                break;
-            case AlphaDoUnmultiply:
-                doFloatingPointPacking(sourceData, sourceDataFormat, width, height, sourceUnpackAlignment, destination, packOneRowOfRGBA32FToRA32FUnmultiply, 2);
-                break;
-            }
-            break;
+    DataFormat dstDataFormat = getDataFormat(destinationFormat, destinationType);
+    int dstStride = width * TexelBytesForFormat(dstDataFormat);
+    if (flipY) {
+        destinationData = static_cast<uint8_t*>(destinationData) + dstStride*(height - 1);
+        dstStride = -dstStride;
+    }
+    if (!HasAlpha(sourceDataFormat) || !HasColor(sourceDataFormat) || !HasColor(dstDataFormat))
+        alphaOp = AlphaDoNothing;
+
+    if (sourceDataFormat == dstDataFormat && alphaOp == AlphaDoNothing) {
+        const uint8_t* ptr = sourceData;
+        const uint8_t* ptrEnd = sourceData + srcStride * height;
+        unsigned rowSize = (dstStride > 0) ? dstStride: -dstStride;
+        uint8_t* dst = static_cast<uint8_t*>(destinationData);
+        while (ptr < ptrEnd) {
+            memcpy(dst, ptr, rowSize);
+            ptr += srcStride;
+            dst += dstStride;
         }
-        break;
-    }
+        return true;
     }
+
+    FormatConverter converter(width, height, sourceData, destinationData, srcStride, dstStride);
+    converter.convert(sourceDataFormat, dstDataFormat, alphaOp);
+    if (!converter.Success())
+        return false;
     return true;
 }
 
index 911f81c..336997f 100644 (file)
@@ -602,44 +602,65 @@ public:
     // reduce the amount of temporary allocations during texture
     // uploading. This enum must be public because it is accessed
     // by non-member functions.
-    enum SourceDataFormat {
-        SourceFormatRGBA8 = 0,
-        SourceFormatRGBA16Little,
-        SourceFormatRGBA16Big,
-        SourceFormatRGBA32F,
-        SourceFormatRGB8,
-        SourceFormatRGB16Little,
-        SourceFormatRGB16Big,
-        SourceFormatRGB32F,
-        SourceFormatBGR8,
-        SourceFormatBGRA8,
-        SourceFormatBGRA16Little,
-        SourceFormatBGRA16Big,
-        SourceFormatARGB8,
-        SourceFormatARGB16Little,
-        SourceFormatARGB16Big,
-        SourceFormatABGR8,
-        SourceFormatRGBA5551,
-        SourceFormatRGBA4444,
-        SourceFormatRGB565,
-        SourceFormatR8,
-        SourceFormatR16Little,
-        SourceFormatR16Big,
-        SourceFormatR32F,
-        SourceFormatRA8,
-        SourceFormatRA16Little,
-        SourceFormatRA16Big,
-        SourceFormatRA32F,
-        SourceFormatAR8,
-        SourceFormatAR16Little,
-        SourceFormatAR16Big,
-        SourceFormatA8,
-        SourceFormatA16Little,
-        SourceFormatA16Big,
-        SourceFormatA32F,
-        SourceFormatNumFormats
+    enum DataFormat {
+        DataFormatRGBA8 = 0,
+        DataFormatRGBA16Little,
+        DataFormatRGBA16Big,
+        DataFormatRGBA32F,
+        DataFormatRGB8,
+        DataFormatRGB16Little,
+        DataFormatRGB16Big,
+        DataFormatRGB32F,
+        DataFormatBGR8,
+        DataFormatBGRA8,
+        DataFormatBGRA16Little,
+        DataFormatBGRA16Big,
+        DataFormatARGB8,
+        DataFormatARGB16Little,
+        DataFormatARGB16Big,
+        DataFormatABGR8,
+        DataFormatRGBA5551,
+        DataFormatRGBA4444,
+        DataFormatRGB565,
+        DataFormatR8,
+        DataFormatR16Little,
+        DataFormatR16Big,
+        DataFormatR32F,
+        DataFormatRA8,
+        DataFormatRA16Little,
+        DataFormatRA16Big,
+        DataFormatRA32F,
+        DataFormatAR8,
+        DataFormatAR16Little,
+        DataFormatAR16Big,
+        DataFormatA8,
+        DataFormatA16Little,
+        DataFormatA16Big,
+        DataFormatA32F,
+        DataFormatNumFormats
     };
 
+    // Check if the format is one of the formats from the ImageData or DOM elements.
+    // The formats from ImageData is always RGBA8.
+    // The formats from DOM elements vary with Graphics ports. It can only be RGBA8 or BGRA8 for non-CG port while much more for CG port.
+    static ALWAYS_INLINE bool srcFormatComeFromDOMElementOrImageData(DataFormat SrcFormat)
+    {
+#if USE(CG)
+#if CPU(BIG_ENDIAN)
+    return SrcFormat == DataFormatRGBA8 || SrcFormat == DataFormatRGBA16Big
+        || SrcFormat == DataFormatARGB8 || SrcFormat == DataFormatARGB16Big
+        || SrcFormat == DataFormatRGB8 || SrcFormat == DataFormatRGB16Big;
+#else
+    return SrcFormat == DataFormatBGRA8 || SrcFormat == DataFormatARGB16Little
+        || SrcFormat == DataFormatABGR8 || SrcFormat == DataFormatRGBA16Little
+        || SrcFormat == DataFormatBGR8 || SrcFormat == DataFormatRGB16Little
+        || SrcFormat == DataFormatRGBA8 || SrcFormat == DataFormatRGB8;
+#endif
+#else
+    return SrcFormat == DataFormatBGRA8 || SrcFormat == DataFormatRGBA8;
+#endif
+    }
+
     //----------------------------------------------------------------------
     // Entry points for WebGL.
     //
@@ -885,7 +906,7 @@ public:
     // Packs the contents of the given Image which is passed in |pixels| into the passed Vector
     // according to the given format and type, and obeying the flipY and AlphaOp flags.
     // Returns true upon success.
-    static bool packImageData(Image*, const void* pixels, GC3Denum format, GC3Denum type, bool flipY, AlphaOp, SourceDataFormat sourceFormat, unsigned width, unsigned height, unsigned sourceUnpackAlignment, Vector<uint8_t>& data);
+    static bool packImageData(Image*, const void* pixels, GC3Denum format, GC3Denum type, bool flipY, AlphaOp, DataFormat sourceFormat, unsigned width, unsigned height, unsigned sourceUnpackAlignment, Vector<uint8_t>& data);
 
     class ImageExtractor {
     public:
@@ -899,7 +920,7 @@ public:
         const void* imagePixelData() { return m_imagePixelData; }
         unsigned imageWidth() { return m_imageWidth; }
         unsigned imageHeight() { return m_imageHeight; }
-        SourceDataFormat imageSourceFormat() { return m_imageSourceFormat; }
+        DataFormat imageSourceFormat() { return m_imageSourceFormat; }
         AlphaOp imageAlphaOp() { return m_alphaOp; }
         unsigned imageSourceUnpackAlignment() { return m_imageSourceUnpackAlignment; }
         ImageHtmlDomSource imageHtmlDomSource() { return m_imageHtmlDomSource; }
@@ -928,7 +949,7 @@ public:
         const void* m_imagePixelData;
         unsigned m_imageWidth;
         unsigned m_imageHeight;
-        SourceDataFormat m_imageSourceFormat;
+        DataFormat m_imageSourceFormat;
         AlphaOp m_alphaOp;
         unsigned m_imageSourceUnpackAlignment;
     };
@@ -936,20 +957,12 @@ public:
 private:
     GraphicsContext3D(Attributes, HostWindow*, RenderStyle = RenderOffscreen);
 
-    // Helper for getImageData which implements packing of pixel
+    // Helper for packImageData/extractImageData/extractTextureData which implement packing of pixel
     // data into the specified OpenGL destination format and type.
     // A sourceUnpackAlignment of zero indicates that the source
     // data is tightly packed. Non-zero values may take a slow path.
     // Destination data will have no gaps between rows.
-    static bool packPixels(const uint8_t* sourceData,
-                    SourceDataFormat sourceDataFormat,
-                    unsigned int width,
-                    unsigned int height,
-                    unsigned int sourceUnpackAlignment,
-                    unsigned int destinationFormat,
-                    unsigned int destinationType,
-                    AlphaOp alphaOp,
-                    void* destinationData);
+    static bool packPixels(const uint8_t* sourceData, DataFormat sourceDataFormat, unsigned width, unsigned height, unsigned sourceUnpackAlignment, unsigned destinationFormat, unsigned destinationType, AlphaOp, void* destinationData, bool flipY);
 
 #if PLATFORM(MAC) || PLATFORM(GTK) || PLATFORM(QT) || PLATFORM(EFL) || PLATFORM(BLACKBERRY)
     // Take into account the user's requested context creation attributes,
index 3d5a1e2..6cf7b87 100644 (file)
@@ -219,7 +219,7 @@ bool GraphicsContext3D::ImageExtractor::extractImage(bool premultiplyAlpha, bool
     }
 
     m_imagePixelData = cairo_image_surface_get_data(m_imageSurface.get());
-    m_imageSourceFormat = SourceFormatBGRA8;
+    m_imageSourceFormat = DataFormatBGRA8;
     m_imageSourceUnpackAlignment = srcUnpackAlignment;
     return true;
 }
index 942cddb..82fc769 100644 (file)
@@ -62,7 +62,7 @@ enum AlphaFormat {
 };
 
 // This returns SourceFormatNumFormats if the combination of input parameters is unsupported.
-static GraphicsContext3D::SourceDataFormat getSourceDataFormat(unsigned int componentsPerPixel, AlphaFormat alphaFormat, bool is16BitFormat, bool bigEndian)
+static GraphicsContext3D::DataFormat getSourceDataFormat(unsigned componentsPerPixel, AlphaFormat alphaFormat, bool is16BitFormat, bool bigEndian)
 {
     const static SourceDataFormatBase formatTableBase[4][AlphaFormatNumFormats] = { // componentsPerPixel x AlphaFormat
         // AlphaFormatNone            AlphaFormatFirst            AlphaFormatLast
@@ -71,21 +71,21 @@ static GraphicsContext3D::SourceDataFormat getSourceDataFormat(unsigned int comp
         { SourceFormatBaseRGB,        SourceFormatBaseNumFormats, SourceFormatBaseNumFormats }, // 3 componentsPerPixel
         { SourceFormatBaseNumFormats, SourceFormatBaseARGB,       SourceFormatBaseRGBA        } // 4 componentsPerPixel
     };
-    const static GraphicsContext3D::SourceDataFormat formatTable[SourceFormatBaseNumFormats][4] = { // SourceDataFormatBase x bitsPerComponent x endian
+    const static GraphicsContext3D::DataFormat formatTable[SourceFormatBaseNumFormats][4] = { // SourceDataFormatBase x bitsPerComponent x endian
         // 8bits, little endian                 8bits, big endian                     16bits, little endian                        16bits, big endian
-        { GraphicsContext3D::SourceFormatR8,    GraphicsContext3D::SourceFormatR8,    GraphicsContext3D::SourceFormatR16Little,    GraphicsContext3D::SourceFormatR16Big },
-        { GraphicsContext3D::SourceFormatA8,    GraphicsContext3D::SourceFormatA8,    GraphicsContext3D::SourceFormatA16Little,    GraphicsContext3D::SourceFormatA16Big },
-        { GraphicsContext3D::SourceFormatAR8,   GraphicsContext3D::SourceFormatRA8,   GraphicsContext3D::SourceFormatRA16Little,   GraphicsContext3D::SourceFormatRA16Big },
-        { GraphicsContext3D::SourceFormatRA8,   GraphicsContext3D::SourceFormatAR8,   GraphicsContext3D::SourceFormatAR16Little,   GraphicsContext3D::SourceFormatAR16Big },
-        { GraphicsContext3D::SourceFormatBGR8,  GraphicsContext3D::SourceFormatRGB8,  GraphicsContext3D::SourceFormatRGB16Little,  GraphicsContext3D::SourceFormatRGB16Big },
-        { GraphicsContext3D::SourceFormatABGR8, GraphicsContext3D::SourceFormatRGBA8, GraphicsContext3D::SourceFormatRGBA16Little, GraphicsContext3D::SourceFormatRGBA16Big },
-        { GraphicsContext3D::SourceFormatBGRA8, GraphicsContext3D::SourceFormatARGB8, GraphicsContext3D::SourceFormatARGB16Little, GraphicsContext3D::SourceFormatARGB16Big }
+        { GraphicsContext3D::DataFormatR8,    GraphicsContext3D::DataFormatR8,    GraphicsContext3D::DataFormatR16Little,    GraphicsContext3D::DataFormatR16Big },
+        { GraphicsContext3D::DataFormatA8,    GraphicsContext3D::DataFormatA8,    GraphicsContext3D::DataFormatA16Little,    GraphicsContext3D::DataFormatA16Big },
+        { GraphicsContext3D::DataFormatAR8,   GraphicsContext3D::DataFormatRA8,   GraphicsContext3D::DataFormatRA16Little,   GraphicsContext3D::DataFormatRA16Big },
+        { GraphicsContext3D::DataFormatRA8,   GraphicsContext3D::DataFormatAR8,   GraphicsContext3D::DataFormatAR16Little,   GraphicsContext3D::DataFormatAR16Big },
+        { GraphicsContext3D::DataFormatBGR8,  GraphicsContext3D::DataFormatRGB8,  GraphicsContext3D::DataFormatRGB16Little,  GraphicsContext3D::DataFormatRGB16Big },
+        { GraphicsContext3D::DataFormatABGR8, GraphicsContext3D::DataFormatRGBA8, GraphicsContext3D::DataFormatRGBA16Little, GraphicsContext3D::DataFormatRGBA16Big },
+        { GraphicsContext3D::DataFormatBGRA8, GraphicsContext3D::DataFormatARGB8, GraphicsContext3D::DataFormatARGB16Little, GraphicsContext3D::DataFormatARGB16Big }
     };
 
     ASSERT(componentsPerPixel <= 4 && componentsPerPixel > 0);
     SourceDataFormatBase formatBase = formatTableBase[componentsPerPixel - 1][alphaFormat];
     if (formatBase == SourceFormatBaseNumFormats)
-        return GraphicsContext3D::SourceFormatNumFormats;
+        return GraphicsContext3D::DataFormatNumFormats;
     return formatTable[formatBase][(is16BitFormat ? 2 : 0) + (bigEndian ? 1 : 0)];
 }
 
@@ -230,7 +230,7 @@ bool GraphicsContext3D::ImageExtractor::extractImage(bool premultiplyAlpha, bool
     }
 
     m_imageSourceFormat = getSourceDataFormat(componentsPerPixel, alphaFormat, bitsPerComponent == 16, bigEndianSource);
-    if (m_imageSourceFormat == SourceFormatNumFormats)
+    if (m_imageSourceFormat == DataFormatNumFormats)
         return false;
 
     m_pixelData.adoptCF(CGDataProviderCopyData(CGImageGetDataProvider(m_cgImage)));
index a3d7b16..2ffbb78 100644 (file)
@@ -292,7 +292,7 @@ bool GraphicsContext3D::ImageExtractor::extractImage(bool premultiplyAlpha, bool
     }
 
     m_imagePixelData = cairo_image_surface_get_data(m_imageSurface.get());
-    m_imageSourceFormat = SourceFormatBGRA8;
+    m_imageSourceFormat = DataFormatBGRA8;
     m_imageSourceUnpackAlignment = srcUnpackAlignment;
 
     return true;
index 20b3b7d..584640a 100644 (file)
@@ -538,7 +538,7 @@ bool GraphicsContext3D::ImageExtractor::extractImage(bool premultiplyAlpha, bool
     if (!m_imageWidth || !m_imageHeight)
         return false;
     m_imagePixelData = m_qtImage.constBits();
-    m_imageSourceFormat = SourceFormatBGRA8;
+    m_imageSourceFormat = DataFormatBGRA8;
     m_imageSourceUnpackAlignment = 0;
 
     return true;
index 5bd9a3b..3b1c8f2 100644 (file)
@@ -89,7 +89,7 @@ bool GraphicsContext3D::ImageExtractor::extractImage(bool premultiplyAlpha, bool
     if (!m_skiaImage)
         return false;
 
-    m_imageSourceFormat = SK_B32_SHIFT ? SourceFormatRGBA8 : SourceFormatBGRA8;
+    m_imageSourceFormat = SK_B32_SHIFT ? DataFormatRGBA8 : DataFormatBGRA8;
     m_imageWidth = m_skiaImage->bitmap().width();
     m_imageHeight = m_skiaImage->bitmap().height();
     if (!m_imageWidth || !m_imageHeight)