From 5e3668f00922dcbbfbe164e59e8bc5d8b296c8c2 Mon Sep 17 00:00:00 2001
From: Mathieu Faverge <mathieu.faverge@inria.fr>
Date: Wed, 25 Sep 2024 13:44:28 +0200
Subject: [PATCH] compute:gemm: fix summa gemm when A or B are transposed

---
 compute/pzgemm.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/compute/pzgemm.c b/compute/pzgemm.c
index f0c77ad3f..5c6563d2d 100644
--- a/compute/pzgemm.c
+++ b/compute/pzgemm.c
@@ -233,7 +233,7 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran
                     options,
                     ChamUpperLower, tempkk, tempmm,
                     A(  k,  m ),
-                    WA( m, (k % C->q) + lq ) );
+                    WA( m, (m % C->q) + lq ) );
 
                 RUNTIME_data_flush( sequence, A( k, m ) );
 
@@ -241,8 +241,8 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran
                     INSERT_TASK_zlacpy(
                         options,
                         ChamUpperLower, tempkk, tempmm,
-                        WA( m, ((k+q-1) % C->q) + lq ),
-                        WA( m, ((k+q)   % C->q) + lq ) );
+                        WA( m, ((m+q-1) % C->q) + lq ),
+                        WA( m, ((m+q)   % C->q) + lq ) );
                 }
             }
         }
@@ -273,7 +273,7 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran
                     options,
                     ChamUpperLower, tempnn, tempkk,
                     B(   n,              k ),
-                    WB( (k % C->p) + lp, n ) );
+                    WB( (n % C->p) + lp, n ) );
 
                 RUNTIME_data_flush( sequence, B( n, k ) );
 
@@ -281,8 +281,8 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran
                     INSERT_TASK_zlacpy(
                         options,
                         ChamUpperLower, tempnn, tempkk,
-                        WB( ((k+p-1) % C->p) + lp, n ),
-                        WB( ((k+p)   % C->p) + lp, n ) );
+                        WB( ((n+p-1) % C->p) + lp, n ),
+                        WB( ((n+p)   % C->p) + lp, n ) );
                 }
             }
         }
-- 
GitLab