[WHLSL] Property resolver needs to recurse on newValueExpression for RMW operations
authorsbarati@apple.com <sbarati@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Thu, 20 Jun 2019 20:10:06 +0000 (20:10 +0000)
committersbarati@apple.com <sbarati@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Thu, 20 Jun 2019 20:10:06 +0000 (20:10 +0000)
https://bugs.webkit.org/show_bug.cgi?id=199037

Reviewed by Myles C. Maxfield.

Source/WebCore:

When we had an expression like `<e1> += <e2>`, we weren't running the property
resolver on <e2>. If <e2> was something like `mat[1][2]`, we wouldn't end up
simplifying that into the needed getter calls. This patch fixes this by having
the property resolver recurse on <e2>.

This patch also fixes a bug in the property resolver where we weren't marking some
dereference expressions as LValues. This was causing bugs in the metal code generator.

This patch also adds a way to dump the AST between passes that are
guaranteed to not fail.

Test: webgpu/whlsl-read-modify-write-high-zombies.html

* Modules/webgpu/WHLSL/WHLSLPrepare.cpp:
(WebCore::WHLSL::prepareShared):
* Modules/webgpu/WHLSL/WHLSLPropertyResolver.cpp:
(WebCore::WHLSL::PropertyResolver::visit):
* Modules/webgpu/WHLSL/WHLSLStandardLibrary.txt:

LayoutTests:

* webgpu/whlsl-read-modify-write-high-zombies-expected.txt: Added.
* webgpu/whlsl-read-modify-write-high-zombies.html: Added.

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@246649 268f45cc-cd09-0410-ab3c-d52691b4dbfc

LayoutTests/ChangeLog
LayoutTests/webgpu/whlsl-read-modify-write-high-zombies-expected.txt [new file with mode: 0644]
LayoutTests/webgpu/whlsl-read-modify-write-high-zombies.html [new file with mode: 0644]
Source/WebCore/ChangeLog
Source/WebCore/Modules/webgpu/WHLSL/WHLSLPrepare.cpp
Source/WebCore/Modules/webgpu/WHLSL/WHLSLPropertyResolver.cpp
Source/WebCore/Modules/webgpu/WHLSL/WHLSLStandardLibrary.txt

index ffe25e7..47c6893 100644 (file)
@@ -1,3 +1,13 @@
+2019-06-20  Saam Barati  <sbarati@apple.com>
+
+        [WHLSL] Property resolver needs to recurse on newValueExpression for RMW operations
+        https://bugs.webkit.org/show_bug.cgi?id=199037
+
+        Reviewed by Myles C. Maxfield.
+
+        * webgpu/whlsl-read-modify-write-high-zombies-expected.txt: Added.
+        * webgpu/whlsl-read-modify-write-high-zombies.html: Added.
+
 2019-06-20  Justin Fan  <justin_fan@apple.com>
 
         [WebGPU] High Sierra/Intel HD Graphics 4000: whlsl-harness-test.html crashes during dispatch call.
diff --git a/LayoutTests/webgpu/whlsl-read-modify-write-high-zombies-expected.txt b/LayoutTests/webgpu/whlsl-read-modify-write-high-zombies-expected.txt
new file mode 100644 (file)
index 0000000..c3ed945
--- /dev/null
@@ -0,0 +1,5 @@
+PASS successfullyParsed is true
+
+TEST COMPLETE
+PASS resultsFloat32Array[0] is 66
+
diff --git a/LayoutTests/webgpu/whlsl-read-modify-write-high-zombies.html b/LayoutTests/webgpu/whlsl-read-modify-write-high-zombies.html
new file mode 100644 (file)
index 0000000..d3b7238
--- /dev/null
@@ -0,0 +1,97 @@
+<!DOCTYPE html>
+<html>
+<head>
+<script src="../resources/js-test-pre.js"></script>
+</head>
+<body>
+<script>
+const shaderSource = `
+bool fill(thread float4x4* mat, float value)
+{
+    float4x4 result;
+    for (uint i = 0; i < 4; i = i + 1) {
+            result[i] = float4(value, value, value, value);
+    }
+    *mat = result;
+    return true;
+}
+
+[numthreads(1, 1, 1)]
+compute void computeShader(device float[] buffer : register(u0), float3 threadID : SV_DispatchThreadID) {
+    float4x4 mat;
+    fill(&mat, 4);
+
+    float4 vec;
+    vec[0] += mat[0][0] + mat[0][1] + mat[3][2];
+
+    float value = 42;
+    value += vec[0];
+
+    mat[0][0] += (mat[0][0] += (value += mat[0][0]));
+
+    buffer[uint(threadID.x)] = mat[0][0];
+}
+`;
+let resultsFloat32Array;
+async function start() {
+    const adapter = await navigator.gpu.requestAdapter();
+    const device = await adapter.requestDevice();
+
+    const shaderModule = device.createShaderModule({code: shaderSource, isWHLSL: true});
+    const computeStage = {module: shaderModule, entryPoint: "computeShader"};
+
+    const bindGroupLayoutDescriptor = {bindings: [{binding: 0, visibility: 7, type: "storage-buffer"}]};
+    const bindGroupLayout = device.createBindGroupLayout(bindGroupLayoutDescriptor);
+    const pipelineLayoutDescriptor = {bindGroupLayouts: [bindGroupLayout]};
+    const pipelineLayout = device.createPipelineLayout(pipelineLayoutDescriptor);
+
+    const computePipelineDescriptor = {computeStage, layout: pipelineLayout};
+    const computePipeline = device.createComputePipeline(computePipelineDescriptor);
+
+    const size = Float32Array.BYTES_PER_ELEMENT * 1;
+
+    const bufferDescriptor = {size, usage: GPUBufferUsage.MAP_WRITE | GPUBufferUsage.TRANSFER_SRC};
+    const buffer = device.createBuffer(bufferDescriptor);
+    const bufferArrayBuffer = await buffer.mapWriteAsync();
+    const bufferFloat32Array = new Float32Array(bufferArrayBuffer);
+    bufferFloat32Array[0] = 0;
+    buffer.unmap();
+
+    const resultsBufferDescriptor = {size, usage: GPUBufferUsage.STORAGE | GPUBufferUsage.TRANSFER_DST | GPUBufferUsage.MAP_READ};
+    const resultsBuffer = device.createBuffer(resultsBufferDescriptor);
+
+    const bufferBinding = {buffer: resultsBuffer, size};
+    const bindGroupBinding = {binding: 0, resource: bufferBinding};
+    const bindGroupDescriptor = {layout: bindGroupLayout, bindings: [bindGroupBinding]};
+    const bindGroup = device.createBindGroup(bindGroupDescriptor);
+
+    const commandEncoder = device.createCommandEncoder(); // {}
+    commandEncoder.copyBufferToBuffer(buffer, 0, resultsBuffer, 0, size);
+    const computePassEncoder = commandEncoder.beginComputePass();
+    computePassEncoder.setPipeline(computePipeline);
+    computePassEncoder.setBindGroup(0, bindGroup);
+    computePassEncoder.dispatch(1, 1, 1);
+    computePassEncoder.endPass();
+    const commandBuffer = commandEncoder.finish();
+    device.getQueue().submit([commandBuffer]);
+
+    const resultsArrayBuffer = await resultsBuffer.mapReadAsync();
+    resultsFloat32Array = new Float32Array(resultsArrayBuffer);
+    shouldBe("resultsFloat32Array[0]", "66");
+    resultsBuffer.unmap();
+}
+if (window.testRunner)
+    testRunner.waitUntilDone();
+window.addEventListener("load", function() {
+    start().then(function() {
+        if (window.testRunner)
+            testRunner.notifyDone();
+    }, function() {
+        if (window.testRunner)
+            testRunner.notifyDone();
+    });
+});
+</script>
+<script src="../resources/js-test-post.js"></script>
+</body>
+</html>
index df0980a..ec3afe4 100644 (file)
@@ -1,3 +1,29 @@
+2019-06-20  Saam Barati  <sbarati@apple.com>
+
+        [WHLSL] Property resolver needs to recurse on newValueExpression for RMW operations
+        https://bugs.webkit.org/show_bug.cgi?id=199037
+
+        Reviewed by Myles C. Maxfield.
+
+        When we had an expression like `<e1> += <e2>`, we weren't running the property
+        resolver on <e2>. If <e2> was something like `mat[1][2]`, we wouldn't end up
+        simplifying that into the needed getter calls. This patch fixes this by having
+        the property resolver recurse on <e2>.
+        
+        This patch also fixes a bug in the property resolver where we weren't marking some
+        dereference expressions as LValues. This was causing bugs in the metal code generator.
+        
+        This patch also adds a way to dump the AST between passes that are
+        guaranteed to not fail.
+
+        Test: webgpu/whlsl-read-modify-write-high-zombies.html
+
+        * Modules/webgpu/WHLSL/WHLSLPrepare.cpp:
+        (WebCore::WHLSL::prepareShared):
+        * Modules/webgpu/WHLSL/WHLSLPropertyResolver.cpp:
+        (WebCore::WHLSL::PropertyResolver::visit):
+        * Modules/webgpu/WHLSL/WHLSLStandardLibrary.txt:
+
 2019-06-20  John Wilander  <wilander@apple.com>
 
         Storage Access API: Cap the number of times an iframe document can request access
index aca90aa..ed9930b 100644 (file)
@@ -90,7 +90,7 @@ static bool dumpASTAtEndIfNeeded(Program& program)
     return dumpASTIfNeeded(dumpASTAtEnd, program, "AST at end");
 }
 
-#define RUN_PASS(pass, ...) \
+#define CHECK_PASS(pass, ...) \
     do { \
         dumpASTBetweenEachPassIfNeeded(program, "AST before " # pass); \
         if (!pass(__VA_ARGS__)) { \
@@ -99,7 +99,12 @@ static bool dumpASTAtEndIfNeeded(Program& program)
             return WTF::nullopt; \
         } \
     } while (0)
-    
+
+#define RUN_PASS(pass, ...) \
+    do { \
+        dumpASTBetweenEachPassIfNeeded(program, "AST before " # pass); \
+        pass(__VA_ARGS__); \
+    } while (0)
 
 static Optional<Program> prepareShared(String& whlslSource)
 {
@@ -121,27 +126,27 @@ static Optional<Program> prepareShared(String& whlslSource)
         dumpASTAfterParsingIfNeeded(program);
 
     NameResolver nameResolver(program.nameContext());
-    RUN_PASS(resolveNamesInTypes, program, nameResolver);
-    RUN_PASS(checkRecursiveTypes, program);
-    RUN_PASS(synthesizeStructureAccessors, program);
-    RUN_PASS(synthesizeEnumerationFunctions, program);
-    RUN_PASS(synthesizeArrayOperatorLength, program);
-    RUN_PASS(resolveTypeNamesInFunctions, program, nameResolver);
-    RUN_PASS(synthesizeConstructors, program);
-    RUN_PASS(resolveCallsInFunctions, program, nameResolver);
-    RUN_PASS(checkDuplicateFunctions, program);
-
-    RUN_PASS(check, program);
-
-    checkLiteralTypes(program);
-    RUN_PASS(checkTextureReferences, program);
-    RUN_PASS(autoInitializeVariables, program);
-    resolveProperties(program);
-    findHighZombies(program);
-    RUN_PASS(checkStatementBehavior, program);
-    RUN_PASS(checkRecursion, program);
-    RUN_PASS(checkFunctionStages, program);
-    preserveVariableLifetimes(program);
+    CHECK_PASS(resolveNamesInTypes, program, nameResolver);
+    CHECK_PASS(checkRecursiveTypes, program);
+    CHECK_PASS(synthesizeStructureAccessors, program);
+    CHECK_PASS(synthesizeEnumerationFunctions, program);
+    CHECK_PASS(synthesizeArrayOperatorLength, program);
+    CHECK_PASS(resolveTypeNamesInFunctions, program, nameResolver);
+    CHECK_PASS(synthesizeConstructors, program);
+    CHECK_PASS(resolveCallsInFunctions, program, nameResolver);
+    CHECK_PASS(checkDuplicateFunctions, program);
+
+    CHECK_PASS(check, program);
+
+    RUN_PASS(checkLiteralTypes, program);
+    CHECK_PASS(checkTextureReferences, program);
+    CHECK_PASS(autoInitializeVariables, program);
+    RUN_PASS(resolveProperties, program);
+    RUN_PASS(findHighZombies, program);
+    CHECK_PASS(checkStatementBehavior, program);
+    CHECK_PASS(checkRecursion, program);
+    CHECK_PASS(checkFunctionStages, program);
+    RUN_PASS(preserveVariableLifetimes, program);
 
     dumpASTAtEndIfNeeded(program);
 
index cd258b7..2339c1c 100644 (file)
@@ -546,6 +546,10 @@ void PropertyResolver::visit(AST::AssignmentExpression& assignmentExpression)
 
 void PropertyResolver::visit(AST::ReadModifyWriteExpression& readModifyWriteExpression)
 {
+    checkErrorAndVisit(readModifyWriteExpression.newValueExpression());
+    if (error())
+        return;
+
     if (readModifyWriteExpression.leftValue().typeAnnotation().leftAddressSpace()) {
         // Consider a++;
         // This would get transformed into:
@@ -588,7 +592,7 @@ void PropertyResolver::visit(AST::ReadModifyWriteExpression& readModifyWriteExpr
 
             auto dereferenceExpression = makeUniqueRef<AST::DereferenceExpression>(Lexer::Token(readModifyWriteExpression.origin()), WTFMove(variableReference1));
             dereferenceExpression->setType(baseType->clone());
-            dereferenceExpression->setTypeAnnotation(AST::RightValue());
+            dereferenceExpression->setTypeAnnotation(AST::LeftValue { AST::AddressSpace::Thread }); // FIXME: https://bugs.webkit.org/show_bug.cgi?id=198169 Is this right?
 
             auto variableReference2 = readModifyWriteExpression.oldVariableReference();
             variableReference2->setType(baseType->clone());
@@ -621,7 +625,7 @@ void PropertyResolver::visit(AST::ReadModifyWriteExpression& readModifyWriteExpr
 
             auto dereferenceExpression = makeUniqueRef<AST::DereferenceExpression>(Lexer::Token(readModifyWriteExpression.origin()), WTFMove(variableReference1));
             dereferenceExpression->setType(baseType->clone());
-            dereferenceExpression->setTypeAnnotation(AST::RightValue());
+            dereferenceExpression->setTypeAnnotation(AST::LeftValue { AST::AddressSpace::Thread }); // FIXME: https://bugs.webkit.org/show_bug.cgi?id=198169 Is this right?
 
             auto variableReference2 = readModifyWriteExpression.newVariableReference();
             variableReference2->setType(baseType->clone());
index c4ca0c3..1b77292 100644 (file)
@@ -619,110 +619,110 @@ float4 operator[]=(float4 v, uint index, float a) {
 float4 mul(float4x4 x, float4 y) {
     float4 result;
     result[0] = 0;
-    result[0] = result[0] + x[0][0] * y[0];
-    result[0] = result[0] + x[0][1] * y[1];
-    result[0] = result[0] + x[0][2] * y[2];
-    result[0] = result[0] + x[0][3] * y[3];
+    result[0] += x[0][0] * y[0];
+    result[0] += x[0][1] * y[1];
+    result[0] += x[0][2] * y[2];
+    result[0] += x[0][3] * y[3];
     result[1] = 0;
-    result[1] = result[1] + x[1][0] * y[0];
-    result[1] = result[1] + x[1][1] * y[1];
-    result[1] = result[1] + x[1][2] * y[2];
-    result[1] = result[1] + x[1][3] * y[3];
+    result[1] += x[1][0] * y[0];
+    result[1] += x[1][1] * y[1];
+    result[1] += x[1][2] * y[2];
+    result[1] += x[1][3] * y[3];
     result[2] = 0;
-    result[2] = result[2] + x[2][0] * y[0];
-    result[2] = result[2] + x[2][1] * y[1];
-    result[2] = result[2] + x[2][2] * y[2];
-    result[2] = result[2] + x[2][3] * y[3];
+    result[2] += x[2][0] * y[0];
+    result[2] += x[2][1] * y[1];
+    result[2] += x[2][2] * y[2];
+    result[2] += x[2][3] * y[3];
     result[3] = 0;
-    result[3] = result[3] + x[3][0] * y[0];
-    result[3] = result[3] + x[3][1] * y[1];
-    result[3] = result[3] + x[3][2] * y[2];
-    result[3] = result[3] + x[3][3] * y[3];
+    result[3] += x[3][0] * y[0];
+    result[3] += x[3][1] * y[1];
+    result[3] += x[3][2] * y[2];
+    result[3] += x[3][3] * y[3];
     return result;
 }
 
 float4x4 mul(float4x4 x, float4x4 y) {
     float4x4 result;
     result[0][0] = 0;
-    result[0][0] = result[0][0] + x[0][0] * y[0][0];
-    result[0][0] = result[0][0] + x[0][1] * y[1][0];
-    result[0][0] = result[0][0] + x[0][2] * y[2][0];
-    result[0][0] = result[0][0] + x[0][3] * y[3][0];
+    result[0][0] += x[0][0] * y[0][0];
+    result[0][0] += x[0][1] * y[1][0];
+    result[0][0] += x[0][2] * y[2][0];
+    result[0][0] += x[0][3] * y[3][0];
     result[0][1] = 0;
-    result[0][1] = result[0][1] + x[0][0] * y[0][1];
-    result[0][1] = result[0][1] + x[0][1] * y[1][1];
-    result[0][1] = result[0][1] + x[0][2] * y[2][1];
-    result[0][1] = result[0][1] + x[0][3] * y[3][1];
+    result[0][1] += x[0][0] * y[0][1];
+    result[0][1] += x[0][1] * y[1][1];
+    result[0][1] += x[0][2] * y[2][1];
+    result[0][1] += x[0][3] * y[3][1];
     result[0][2] = 0;
-    result[0][2] = result[0][2] + x[0][0] * y[0][2];
-    result[0][2] = result[0][2] + x[0][1] * y[1][2];
-    result[0][2] = result[0][2] + x[0][2] * y[2][2];
-    result[0][2] = result[0][2] + x[0][3] * y[3][2];
+    result[0][2] += x[0][0] * y[0][2];
+    result[0][2] += x[0][1] * y[1][2];
+    result[0][2] += x[0][2] * y[2][2];
+    result[0][2] += x[0][3] * y[3][2];
     result[0][3] = 0;
-    result[0][3] = result[0][3] + x[0][0] * y[0][3];
-    result[0][3] = result[0][3] + x[0][1] * y[1][3];
-    result[0][3] = result[0][3] + x[0][2] * y[2][3];
-    result[0][3] = result[0][3] + x[0][3] * y[3][3];
+    result[0][3] += x[0][0] * y[0][3];
+    result[0][3] += x[0][1] * y[1][3];
+    result[0][3] += x[0][2] * y[2][3];
+    result[0][3] += x[0][3] * y[3][3];
     result[1][0] = 0;
-    result[1][0] = result[1][0] + x[1][0] * y[0][0];
-    result[1][0] = result[1][0] + x[1][1] * y[1][0];
-    result[1][0] = result[1][0] + x[1][2] * y[2][0];
-    result[1][0] = result[1][0] + x[1][3] * y[3][0];
+    result[1][0] += x[1][0] * y[0][0];
+    result[1][0] += x[1][1] * y[1][0];
+    result[1][0] += x[1][2] * y[2][0];
+    result[1][0] += x[1][3] * y[3][0];
     result[1][1] = 0;
-    result[1][1] = result[1][1] + x[1][0] * y[0][1];
-    result[1][1] = result[1][1] + x[1][1] * y[1][1];
-    result[1][1] = result[1][1] + x[1][2] * y[2][1];
-    result[1][1] = result[1][1] + x[1][3] * y[3][1];
+    result[1][1] += x[1][0] * y[0][1];
+    result[1][1] += x[1][1] * y[1][1];
+    result[1][1] += x[1][2] * y[2][1];
+    result[1][1] += x[1][3] * y[3][1];
     result[1][2] = 0;
-    result[1][2] = result[1][2] + x[1][0] * y[0][2];
-    result[1][2] = result[1][2] + x[1][1] * y[1][2];
-    result[1][2] = result[1][2] + x[1][2] * y[2][2];
-    result[1][2] = result[1][2] + x[1][3] * y[3][2];
+    result[1][2] += x[1][0] * y[0][2];
+    result[1][2] += x[1][1] * y[1][2];
+    result[1][2] += x[1][2] * y[2][2];
+    result[1][2] += x[1][3] * y[3][2];
     result[1][3] = 0;
-    result[1][3] = result[1][3] + x[1][0] * y[0][3];
-    result[1][3] = result[1][3] + x[1][1] * y[1][3];
-    result[1][3] = result[1][3] + x[1][2] * y[2][3];
-    result[1][3] = result[1][3] + x[1][3] * y[3][3];
+    result[1][3] += x[1][0] * y[0][3];
+    result[1][3] += x[1][1] * y[1][3];
+    result[1][3] += x[1][2] * y[2][3];
+    result[1][3] += x[1][3] * y[3][3];
     result[2][0] = 0;
-    result[2][0] = result[2][0] + x[2][0] * y[0][0];
-    result[2][0] = result[2][0] + x[2][1] * y[1][0];
-    result[2][0] = result[2][0] + x[2][2] * y[2][0];
-    result[2][0] = result[2][0] + x[2][3] * y[3][0];
+    result[2][0] += x[2][0] * y[0][0];
+    result[2][0] += x[2][1] * y[1][0];
+    result[2][0] += x[2][2] * y[2][0];
+    result[2][0] += x[2][3] * y[3][0];
     result[2][1] = 0;
-    result[2][1] = result[2][1] + x[2][0] * y[0][1];
-    result[2][1] = result[2][1] + x[2][1] * y[1][1];
-    result[2][1] = result[2][1] + x[2][2] * y[2][1];
-    result[2][1] = result[2][1] + x[2][3] * y[3][1];
+    result[2][1] += x[2][0] * y[0][1];
+    result[2][1] += x[2][1] * y[1][1];
+    result[2][1] += x[2][2] * y[2][1];
+    result[2][1] += x[2][3] * y[3][1];
     result[2][2] = 0;
-    result[2][2] = result[2][2] + x[2][0] * y[0][2];
-    result[2][2] = result[2][2] + x[2][1] * y[1][2];
-    result[2][2] = result[2][2] + x[2][2] * y[2][2];
-    result[2][2] = result[2][2] + x[2][3] * y[3][2];
+    result[2][2] += x[2][0] * y[0][2];
+    result[2][2] += x[2][1] * y[1][2];
+    result[2][2] += x[2][2] * y[2][2];
+    result[2][2] += x[2][3] * y[3][2];
     result[2][3] = 0;
-    result[2][3] = result[2][3] + x[2][0] * y[0][3];
-    result[2][3] = result[2][3] + x[2][1] * y[1][3];
-    result[2][3] = result[2][3] + x[2][2] * y[2][3];
-    result[2][3] = result[2][3] + x[2][3] * y[3][3];
+    result[2][3] += x[2][0] * y[0][3];
+    result[2][3] += x[2][1] * y[1][3];
+    result[2][3] += x[2][2] * y[2][3];
+    result[2][3] += x[2][3] * y[3][3];
     result[3][0] = 0;
-    result[3][0] = result[3][0] + x[3][0] * y[0][0];
-    result[3][0] = result[3][0] + x[3][1] * y[1][0];
-    result[3][0] = result[3][0] + x[3][2] * y[2][0];
-    result[3][0] = result[3][0] + x[3][3] * y[3][0];
+    result[3][0] += x[3][0] * y[0][0];
+    result[3][0] += x[3][1] * y[1][0];
+    result[3][0] += x[3][2] * y[2][0];
+    result[3][0] += x[3][3] * y[3][0];
     result[3][1] = 0;
-    result[3][1] = result[3][1] + x[3][0] * y[0][1];
-    result[3][1] = result[3][1] + x[3][1] * y[1][1];
-    result[3][1] = result[3][1] + x[3][2] * y[2][1];
-    result[3][1] = result[3][1] + x[3][3] * y[3][1];
+    result[3][1] += x[3][0] * y[0][1];
+    result[3][1] += x[3][1] * y[1][1];
+    result[3][1] += x[3][2] * y[2][1];
+    result[3][1] += x[3][3] * y[3][1];
     result[3][2] = 0;
-    result[3][2] = result[3][2] + x[3][0] * y[0][2];
-    result[3][2] = result[3][2] + x[3][1] * y[1][2];
-    result[3][2] = result[3][2] + x[3][2] * y[2][2];
-    result[3][2] = result[3][2] + x[3][3] * y[3][2];
+    result[3][2] += x[3][0] * y[0][2];
+    result[3][2] += x[3][1] * y[1][2];
+    result[3][2] += x[3][2] * y[2][2];
+    result[3][2] += x[3][3] * y[3][2];
     result[3][3] = 0;
-    result[3][3] = result[3][3] + x[3][0] * y[0][3];
-    result[3][3] = result[3][3] + x[3][1] * y[1][3];
-    result[3][3] = result[3][3] + x[3][2] * y[2][3];
-    result[3][3] = result[3][3] + x[3][3] * y[3][3];
+    result[3][3] += x[3][0] * y[0][3];
+    result[3][3] += x[3][1] * y[1][3];
+    result[3][3] += x[3][2] * y[2][3];
+    result[3][3] += x[3][3] * y[3][3];
     return result;
 }