diff --git a/compute/pzhemm.c b/compute/pzhemm.c
index 93efe6fcaf43be3a57c8c8314a7b9c11f6c7448d..b41220165efa574408a907cdd54e6e55dd705652 100644
--- a/compute/pzhemm.c
+++ b/compute/pzhemm.c
@@ -23,47 +23,36 @@
  */
 #include "control/common.h"
 
-#define A(m, n) A,  m,  n
-#define B(m, n) B,  m,  n
-#define C(m, n) C,  m,  n
-#define WA(m, n) &WA,  m,  n
-#define WB(m, n) &WB,  m,  n
+#define A(  _m_, _n_ ) A,  (_m_), (_n_)
+#define B(  _m_, _n_ ) B,  (_m_), (_n_)
+#define C(  _m_, _n_ ) C,  (_m_), (_n_)
+#define WA( _m_, _n_ ) WA, (_m_), (_n_)
+#define WB( _m_, _n_ ) WB, (_m_), (_n_)
 
 /**
  *  Parallel tile hermitian matrix-matrix multiplication.
  *  SUMMA algorithm for 2D block-cyclic data distribution.
  */
 static inline void
-chameleon_pzhemm_summa( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t uplo,
-                        CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B,
-                        CHAMELEON_Complex64_t beta,  CHAM_desc_t *C,
-                        RUNTIME_option_t *options )
+chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
+                             CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B,
+                             CHAMELEON_Complex64_t beta,  CHAM_desc_t *C,
+                             CHAM_desc_t *WA, CHAM_desc_t *WB,
+                             RUNTIME_option_t *options )
 {
     RUNTIME_sequence_t *sequence = options->sequence;
-    cham_trans_t transA, transB;
-    int Am, An, m, n, k, p, q, KT, K, lp, lq;
-    int ldam, ldbk, ldbm, ldcm;
+    cham_trans_t transA;
+    int m, n, k, p, q, KT, K, lp, lq;
+    int ldcm;
     int tempmm, tempnn, tempkk;
     int lookahead, myp, myq;
 
     CHAMELEON_Complex64_t zbeta;
     CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t)1.0;
-    CHAM_desc_t WA, WB;
 
     lookahead = chamctxt->lookahead;
-    chameleon_desc_init( &WA, CHAMELEON_MAT_ALLOC_TILE,
-                         ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb),
-                         C->mt * C->mb, C->nb * C->q * lookahead, 0, 0,
-                         C->mt * C->mb, C->nb * C->q * lookahead, C->p, C->q,
-                         NULL, NULL, NULL );
-    chameleon_desc_init( &WB, CHAMELEON_MAT_ALLOC_TILE,
-                         ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb),
-                         C->mb * C->p * lookahead, C->nt * C->nb, 0, 0,
-                         C->mb * C->p * lookahead, C->nt * C->nb, C->p, C->q,
-                         NULL, NULL, NULL );
-
-    KT  = side == ChamLeft ? A->nt : A->mt;
-    K   = side == ChamLeft ? A->n  : A->m;
+    KT  = A->nt;
+    K   = A->n;
     myp = C->myrank / C->q;
     myq = C->myrank % C->q;
 
@@ -72,162 +61,291 @@ chameleon_pzhemm_summa( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t
         lq = (k % lookahead) * C->q;
         tempkk = k == KT - 1 ? K - k * A->nb : A->nb;
         zbeta = k == 0 ? beta : zone;
-        ldbk = BLKLDD(B, k);
 
         /* Transfert ownership of the k column of A or B */
         for (m = 0; m < C->mt; m ++ ) {
-            tempmm = m == C->mt-1 ? C->m - m * C->mb : C->mb;
+            int Am, Ak, ldam;
+            int tempam, tempak;
 
-            if ( side == ChamLeft ) {
-                if ( (( uplo == ChamUpper ) && ( m > k )) ||
-                     (( uplo == ChamLower ) && ( m < k )) ) {
-                    Am = k;
-                    An = m;
-                }
-                else {
-                    Am = m;
-                    An = k;
-                }
-                ldam = BLKLDD(A, Am);
-
-                INSERT_TASK_zlacpy(
-                    options,
-                    ChamUpperLower, tempmm, tempkk, C->mb,
-                    A(  Am, An ),             ldam,
-                    WA( m, (k % C->q) + lq ), WA.mb );
-
-                RUNTIME_data_flush( sequence, A( Am, An ) );
+            tempmm = m == C->mt-1 ? C->m - m * C->mb : C->mb;
 
-                for ( q=1; q < C->q; q++ ) {
-                    INSERT_TASK_zlacpy(
-                        options,
-                        ChamUpperLower, tempmm, tempkk, C->mb,
-                        WA( m, ((k+q-1) % C->q) + lq ), WA.mb,
-                        WA( m, ((k+q)   % C->q) + lq ), WA.mb );
-                }
+            if ( (( uplo == ChamUpper ) && ( m > k )) ||
+                 (( uplo == ChamLower ) && ( m < k )) )
+            {
+                    /* Let's take A( k, m ) */
+                Am = k;
+                Ak = m;
+                tempam = tempkk;
+                tempak = tempmm;
             }
             else {
-                ldbm = BLKLDD(B, m);
+                /* Let's take A( m, k ) */
+                Am = m;
+                Ak = k;
+                tempam = tempmm;
+                tempak = tempkk;
+            }
+            ldam = BLKLDD( A, Am );
 
-                INSERT_TASK_zlacpy(
-                    options,
-                    ChamUpperLower, tempmm, tempkk, C->mb,
-                    B(  m,  k ),              ldbm,
-                    WA( m, (k % C->q) + lq ), WA.mb );
+            INSERT_TASK_zlacpy(
+                options,
+                ChamUpperLower, tempam, tempak, C->mb,
+                A( Am, Ak ),              ldam,
+                WA( m, (k % C->q) + lq ), WA->mb );
 
-                RUNTIME_data_flush( sequence, B( m, k ) );
+            RUNTIME_data_flush( sequence, A( Am, Ak ) );
 
-                for ( q=1; q < C->q; q++ ) {
-                    INSERT_TASK_zlacpy(
-                        options,
-                        ChamUpperLower, tempmm, tempkk, C->mb,
-                        WA( m, ((k+q-1) % C->q) + lq ), WA.mb,
-                        WA( m, ((k+q)   % C->q) + lq ), WA.mb );
-                }
+            for ( q=1; q < C->q; q++ ) {
+                INSERT_TASK_zlacpy(
+                    options,
+                    ChamUpperLower, tempam, tempak, C->mb,
+                    WA( m, ((k+q-1) % C->q) + lq ), WA->mb,
+                    WA( m, ((k+q)   % C->q) + lq ), WA->mb );
             }
         }
 
         /* Transfert ownership of the k row of B, or A */
         for (n = 0; n < C->nt; n++) {
+            int ldbk;
+
             tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
+            ldbk = BLKLDD( B, k );
 
-            if ( side == ChamRight ) {
-                if ( (( uplo == ChamUpper ) && ( n < k )) ||
-                     (( uplo == ChamLower ) && ( n > k )) ) {
-                    Am = n;
-                    An = k;
-                }
-                else {
-                    Am = k;
-                    An = n;
-                }
-                ldam = BLKLDD(A, Am);
+            INSERT_TASK_zlacpy(
+                options,
+                ChamUpperLower, tempkk, tempnn, C->mb,
+                B(   k,              n ), ldbk,
+                WB( (k % C->p) + lp, n ), WB->mb );
 
+            RUNTIME_data_flush( sequence, B( k, n ) );
+
+            for ( p=1; p < C->p; p++ ) {
                 INSERT_TASK_zlacpy(
                     options,
                     ChamUpperLower, tempkk, tempnn, C->mb,
-                    A(   Am,            An ), ldam,
-                    WB( (k % C->p) + lp, n ), WB.mb );
+                    WB( ((k+p-1) % C->p) + lp, n ), WB->mb,
+                    WB( ((k+p)   % C->p) + lp, n ), WB->mb );
+            }
+        }
+
+        /* Perform the update of this iteration */
+        for (m = myp; m < C->mt; m+=C->p) {
+            tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+            ldcm = BLKLDD(C, m);
 
-                RUNTIME_data_flush( sequence, A( Am, An ) );
+            if ( k == m ) {
+                for (n = myq; n < C->nt; n+=C->q) {
+                    tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
 
-                for ( p=1; p < C->p; p++ ) {
-                    INSERT_TASK_zlacpy(
-                        options,
-                        ChamUpperLower, tempkk, tempnn, C->mb,
-                        WB( ((k+p-1) % C->p) + lp, n ), WB.mb,
-                        WB( ((k+p)   % C->p) + lp, n ), WB.mb );
+                    INSERT_TASK_zhemm(
+                        options, ChamLeft, uplo,
+                        tempmm, tempnn, A->mb,
+                        alpha, WA( m,        myq + lq ), WA->mb,
+                               WB( myp + lp, n        ), WB->mb,
+                        zbeta, C(  m,        n        ), ldcm );
                 }
             }
             else {
+                if ( (( uplo == ChamUpper ) && ( m > k )) ||
+                     (( uplo == ChamLower ) && ( m < k )) )
+                {
+                    transA = ChamConjTrans;
+                }
+                else {
+                    transA = ChamNoTrans;
+                }
+
+                for (n = myq; n < C->nt; n+=C->q) {
+                    tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
+
+                    INSERT_TASK_zgemm(
+                        options, transA, ChamNoTrans,
+                        tempmm, tempnn, tempkk, A->mb,
+                        alpha, WA( m,        myq + lq ), WA->mb,
+                               WB( myp + lp, n        ), WB->mb,
+                        zbeta, C(  m,        n        ), ldcm );
+                }
+            }
+        }
+    }
+}
+
+/**
+ *  Parallel tile hermitian matrix-matrix multiplication.
+ *  SUMMA algorithm for 2D block-cyclic data distribution.
+ */
+static inline void
+chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
+                              CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B,
+                              CHAMELEON_Complex64_t beta,  CHAM_desc_t *C,
+                              CHAM_desc_t *WA, CHAM_desc_t *WB,
+                              RUNTIME_option_t *options )
+{
+    RUNTIME_sequence_t *sequence = options->sequence;
+    cham_trans_t transA;
+    int m, n, k, p, q, KT, K, lp, lq;
+    int ldcm;
+    int tempmm, tempnn, tempkk;
+    int lookahead, myp, myq;
+
+    CHAMELEON_Complex64_t zbeta;
+    CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t)1.0;
+
+    lookahead = chamctxt->lookahead;
+    KT  = A->mt;
+    K   = A->m;
+    myp = C->myrank / C->q;
+    myq = C->myrank % C->q;
+
+    for (k = 0; k < KT; k++ ) {
+        lp = (k % lookahead) * C->p;
+        lq = (k % lookahead) * C->q;
+        tempkk = k == KT - 1 ? K - k * A->nb : A->nb;
+        zbeta = k == 0 ? beta : zone;
+
+        /* Transfert ownership of the k column of A or B */
+        for (m = 0; m < C->mt; m++ ) {
+            int ldbm;
+
+            tempmm = m == C->mt-1 ? C->m - m * C->mb : C->mb;
+            ldbm = BLKLDD( B, m );
+
+            INSERT_TASK_zlacpy(
+                options,
+                ChamUpperLower, tempmm, tempkk, C->mb,
+                B(  m,  k ),              ldbm,
+                WA( m, (k % C->q) + lq ), WA->mb );
+
+            RUNTIME_data_flush( sequence, B( m, k ) );
+
+            for ( q=1; q < C->q; q++ ) {
                 INSERT_TASK_zlacpy(
                     options,
-                    ChamUpperLower, tempkk, tempnn, C->mb,
-                    B(   k,              n ), ldbk,
-                    WB( (k % C->p) + lp, n ), WB.mb );
+                    ChamUpperLower, tempmm, tempkk, C->mb,
+                    WA( m, ((k+q-1) % C->q) + lq ), WA->mb,
+                    WA( m, ((k+q)   % C->q) + lq ), WA->mb );
+            }
+        }
 
-                RUNTIME_data_flush( sequence, B( k, n ) );
+        /* Transfert ownership of the k row of B, or A */
+        for (n = 0; n < C->nt; n++) {
+            int Ak, An, ldak;
+            int tempak, tempan;
 
-                for ( p=1; p < C->p; p++ ) {
-                    INSERT_TASK_zlacpy(
-                        options,
-                        ChamUpperLower, tempkk, tempnn, C->mb,
-                        WB( ((k+p-1) % C->p) + lp, n ), WB.mb,
-                        WB( ((k+p)   % C->p) + lp, n ), WB.mb );
-                }
+            tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
+
+            if ( (( uplo == ChamUpper ) && ( n < k )) ||
+                 (( uplo == ChamLower ) && ( n > k )) )
+            {
+                Ak = n;
+                An = k;
+                tempak = tempnn;
+                tempan = tempkk;
+            }
+            else
+            {
+                Ak = k;
+                An = n;
+                tempak = tempkk;
+                tempan = tempnn;
+            }
+            ldak = BLKLDD( A, Ak );
+
+            INSERT_TASK_zlacpy(
+                options,
+                ChamUpperLower, tempak, tempan, C->mb,
+                A(  Ak,              An ), ldak,
+                WB( (k % C->p) + lp, n  ), WB->mb );
+
+            RUNTIME_data_flush( sequence, A( Ak, An ) );
+
+            for ( p=1; p < C->p; p++ ) {
+                INSERT_TASK_zlacpy(
+                    options,
+                    ChamUpperLower, tempak, tempan, C->mb,
+                    WB( ((k+p-1) % C->p) + lp, n ), WB->mb,
+                    WB( ((k+p)   % C->p) + lp, n ), WB->mb );
             }
         }
 
-        /*
-         *  ChamLeft / ChamLower
-         */
-        for (m = myp; m < C->mt; m+=C->p) {
-            tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-            ldcm = BLKLDD(C, m);
+        /* Perform the update of this iteration */
+        for (n = myq; n < C->nt; n+=C->q) {
+            tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
 
-            for (n = myq; n < C->nt; n+=C->q) {
-                tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
+            if ( k == n ) {
+                for (m = myp; m < C->mt; m+=C->p) {
+                    tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                    ldcm = BLKLDD(C, m);
 
-                if (side == ChamLeft) {
-                    transB = ChamNoTrans;
-                    if ( (( uplo == ChamUpper ) && ( m > k )) ||
-                         (( uplo == ChamLower ) && ( m < k )) ) {
-                        transA = ChamConjTrans;
-                    }
-                    else {
-                        transA = ChamNoTrans;
-                    }
+                    /* A has been stored in WA or WB for the summa ring */
+                    INSERT_TASK_zhemm(
+                        options, ChamRight, uplo,
+                        tempmm, tempnn, A->mb,
+                        alpha, WB( myp + lp, n        ), WB->mb,
+                               WA( m,        myq + lq ), WA->mb,
+                        zbeta, C(  m,        n        ), ldcm );
+                }
+            }
+            else {
+                if ( (( uplo == ChamUpper ) && ( n < k )) ||
+                     (( uplo == ChamLower ) && ( n > k )) )
+                {
+                    transA = ChamConjTrans;
                 }
                 else {
                     transA = ChamNoTrans;
-                    if ( (( uplo == ChamUpper ) && ( n < k )) ||
-                         (( uplo == ChamLower ) && ( n > k )) ) {
-                        transB = ChamConjTrans;
-                    }
-                    else {
-                        transB = ChamNoTrans;
-                    }
                 }
 
-                if ( k == m ) {
-                    INSERT_TASK_zhemm(
-                        options, side, uplo,
-                        tempmm, tempnn, A->mb,
-                        alpha, WA( m,        myq + lq ), WA.mb,  /* lda * Z */
-                               WB( myp + lp, n        ), WB.mb,  /* ldb * Y */
-                        zbeta, C(  m,        n        ), ldcm ); /* ldc * Y */
-                }
-                else {
+                for (m = myp; m < C->mt; m+=C->p) {
+                    tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                    ldcm = BLKLDD(C, m);
+
                     INSERT_TASK_zgemm(
-                        options, transA, transB,
+                        options, ChamNoTrans, transA,
                         tempmm, tempnn, tempkk, A->mb,
-                        alpha, WA( m,        myq + lq ), WA.mb,  /* lda * Z */
-                               WB( myp + lp, n        ), WB.mb,  /* ldb * Y */
-                        zbeta, C(  m,        n        ), ldcm ); /* ldc * Y */
+                        alpha, WA( m,        myq + lq ), WA->mb,
+                               WB( myp + lp, n        ), WB->mb,
+                        zbeta, C(  m,        n        ), ldcm );
                 }
             }
         }
     }
+}
+
+/**
+ *  Parallel tile hermitian matrix-matrix multiplication.
+ *  SUMMA algorithm for 2D block-cyclic data distribution.
+ */
+static inline void
+chameleon_pzhemm_summa( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t uplo,
+                        CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B,
+                        CHAMELEON_Complex64_t beta,  CHAM_desc_t *C,
+                        RUNTIME_option_t *options )
+{
+    RUNTIME_sequence_t *sequence = options->sequence;
+    CHAM_desc_t WA, WB;
+    int lookahead;
+
+    lookahead = chamctxt->lookahead;
+    chameleon_desc_init( &WA, CHAMELEON_MAT_ALLOC_TILE,
+                         ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb),
+                         C->mt * C->mb, C->nb * C->q * lookahead, 0, 0,
+                         C->mt * C->mb, C->nb * C->q * lookahead, C->p, C->q,
+                         NULL, NULL, NULL );
+    chameleon_desc_init( &WB, CHAMELEON_MAT_ALLOC_TILE,
+                         ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb),
+                         C->mb * C->p * lookahead, C->nt * C->nb, 0, 0,
+                         C->mb * C->p * lookahead, C->nt * C->nb, C->p, C->q,
+                         NULL, NULL, NULL );
+
+    if (side == ChamLeft) {
+        chameleon_pzhemm_summa_left( chamctxt, uplo, alpha, A, B, beta, C,
+                                     &WA, &WB, options );
+    }
+    else {
+        chameleon_pzhemm_summa_right( chamctxt, uplo, alpha, A, B, beta, C,
+                                      &WA, &WB, options );
+    }
 
     RUNTIME_desc_flush( &WA, sequence );
     RUNTIME_desc_flush( &WB, sequence );
@@ -453,7 +571,8 @@ chameleon_pzhemm( cham_side_t side, cham_uplo_t uplo,
     {
         chameleon_pzhemm_summa(   chamctxt, side, uplo, alpha, A, B, beta, C, &options );
     }
-    else {
+    else
+    {
         chameleon_pzhemm_generic( chamctxt, side, uplo, alpha, A, B, beta, C, &options );
     }
 
diff --git a/compute/pzher2k.c b/compute/pzher2k.c
index 70cdd6b7b7fa3a2dc1e717c34e48ebaec75c1bb5..8f51860bb996a404e4307e4436a2f06e98d59b53 100644
--- a/compute/pzher2k.c
+++ b/compute/pzher2k.c
@@ -29,15 +29,15 @@
 /**
  *  Parallel tile Hermitian rank-k update - dynamic scheduling
  */
-void chameleon_pzher2k(cham_uplo_t uplo, cham_trans_t trans,
-                          CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B,
-                          double beta,  CHAM_desc_t *C,
-                          RUNTIME_sequence_t *sequence, RUNTIME_request_t *request)
+void chameleon_pzher2k( cham_uplo_t uplo, cham_trans_t trans,
+                        CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B,
+                        double beta,  CHAM_desc_t *C,
+                        RUNTIME_sequence_t *sequence, RUNTIME_request_t *request)
 {
     CHAM_context_t *chamctxt;
     RUNTIME_option_t options;
 
-    int m, n, k;
+    int m, n, k, mmin, mmax;
     int ldak, ldam, ldan, ldcm, ldcn;
     int ldbk, ldbm, ldbn;
     int tempnn, tempmm, tempkn, tempkm;
@@ -57,6 +57,16 @@ void chameleon_pzher2k(cham_uplo_t uplo, cham_trans_t trans,
         ldan = BLKLDD(A, n);
         ldbn = BLKLDD(B, n);
         ldcn = BLKLDD(C, n);
+
+        if (uplo == ChamLower) {
+            mmin = n+1;
+            mmax = C->mt;
+        }
+        else {
+            mmin = 0;
+            mmax = n;
+        }
+
         /*
          *  ChamNoTrans
          */
@@ -72,68 +82,34 @@ void chameleon_pzher2k(cham_uplo_t uplo, cham_trans_t trans,
                            B(n, k), ldbn,
                     dbeta, C(n, n), ldcn); /* ldc  * N */
             }
-            /*
-             *  ChamNoTrans / ChamLower
-             */
-            if (uplo == ChamLower) {
-                for (m = n+1; m < C->mt; m++) {
-                    tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                    ldam = BLKLDD(A, m);
-                    ldbm = BLKLDD(B, m);
-                    ldcm = BLKLDD(C, m);
-                    for (k = 0; k < A->nt; k++) {
-                        tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
-                        zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone;
-                        INSERT_TASK_zgemm(
-                            &options,
-                            trans, ChamConjTrans,
-                            tempmm, tempnn, tempkn, A->mb,
-                            conj(alpha), A(m, k), ldam,  /* ldam * K */
-                                   B(n, k), ldbn,  /* ldan * K */
-                            zbeta, C(m, n), ldcm); /* ldc  * N */
+            for (m = mmin; m < mmax; m++) {
+                tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                ldam = BLKLDD(A, m);
+                ldbm = BLKLDD(B, m);
+                ldcm = BLKLDD(C, m);
+                for (k = 0; k < A->nt; k++) {
+                    tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
+                    zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone;
+                    INSERT_TASK_zgemm(
+                        &options,
+                        ChamNoTrans, ChamConjTrans,
+                        tempmm, tempnn, tempkn, A->mb,
+                        alpha, A(m, k), ldam,
+                               B(n, k), ldbn,
+                        zbeta, C(m, n), ldcm);
 
-                        INSERT_TASK_zgemm(
-                            &options,
-                            trans, ChamConjTrans,
-                            tempmm, tempnn, tempkn, A->mb,
-                            alpha, B(m, k), ldbm,  /* ldam * K */
-                                   A(n, k), ldan,  /* ldan * K */
-                            zone,  C(m, n), ldcm); /* ldc  * N */
-                    }
-                }
-            }
-            /*
-             *  ChamNoTrans / ChamUpper
-             */
-            else {
-                for (m = n+1; m < C->mt; m++) {
-                    tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                    ldam = BLKLDD(A, m);
-                    ldbm = BLKLDD(B, m);
-                    for (k = 0; k < A->nt; k++) {
-                        tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
-                        zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone;
-                        INSERT_TASK_zgemm(
-                            &options,
-                            trans, ChamConjTrans,
-                            tempnn, tempmm, tempkn, A->mb,
-                            alpha, A(n, k), ldan,  /* ldan * K */
-                                   B(m, k), ldbm,  /* ldam * M */
-                            zbeta, C(n, m), ldcn); /* ldc  * M */
-
-                        INSERT_TASK_zgemm(
-                            &options,
-                            trans, ChamConjTrans,
-                            tempnn, tempmm, tempkn, A->mb,
-                            conj(alpha), B(n, k), ldan,  /* ldan * K */
-                                   A(m, k), ldam,  /* ldam * M */
-                            zone,  C(n, m), ldcn); /* ldc  * M */
-                    }
+                    INSERT_TASK_zgemm(
+                        &options,
+                        ChamNoTrans, ChamConjTrans,
+                        tempmm, tempnn, tempkn, A->mb,
+                        conj(alpha), B(m, k), ldbm,
+                                     A(n, k), ldan,
+                        zone,        C(m, n), ldcm);
                 }
             }
         }
         /*
-         *  Cham[Conj]Trans
+         *  ChamConjTrans
          */
         else {
             for (k = 0; k < A->mt; k++) {
@@ -149,63 +125,29 @@ void chameleon_pzher2k(cham_uplo_t uplo, cham_trans_t trans,
                            B(k, n), ldbk,
                     dbeta, C(n, n), ldcn); /* ldc * N */
             }
-            /*
-             *  Cham[Conj]Trans / ChamLower
-             */
-            if (uplo == ChamLower) {
-                for (m = n+1; m < C->mt; m++) {
-                    tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                    ldcm = BLKLDD(C, m);
-                    for (k = 0; k < A->mt; k++) {
-                        tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
-                        ldak = BLKLDD(A, k);
-                        ldbk = BLKLDD(B, k);
-                        zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone;
-                        INSERT_TASK_zgemm(
-                            &options,
-                            trans, ChamNoTrans,
-                            tempmm, tempnn, tempkm, A->mb,
-                            alpha, A(k, m), ldak,  /* lda * M */
-                                   B(k, n), ldbk,  /* lda * N */
-                            zbeta, C(m, n), ldcm); /* ldc * N */
-
-                        INSERT_TASK_zgemm(
-                            &options,
-                            trans, ChamNoTrans,
-                            tempmm, tempnn, tempkm, A->mb,
-                            alpha, B(k, m), ldbk,  /* lda * M */
-                                   A(k, n), ldak,  /* lda * N */
-                            zone,  C(m, n), ldcm); /* ldc * N */
-                    }
-                }
-            }
-            /*
-             *  Cham[Conj]Trans / ChamUpper
-             */
-            else {
-                for (m = n+1; m < C->mt; m++) {
-                    tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                    for (k = 0; k < A->mt; k++) {
-                        tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
-                        ldak = BLKLDD(A, k);
-                        ldbk = BLKLDD(B, k);
-                        zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone;
-                        INSERT_TASK_zgemm(
-                            &options,
-                            trans, ChamNoTrans,
-                            tempnn, tempmm, tempkm, A->mb,
-                            alpha, A(k, n), ldak,  /* lda * K */
-                                   B(k, m), ldbk,  /* lda * M */
-                            zbeta, C(n, m), ldcn); /* ldc * M */
+            for (m = mmin; m < mmax; m++) {
+                tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                ldcm = BLKLDD(C, m);
+                for (k = 0; k < A->mt; k++) {
+                    tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
+                    ldak = BLKLDD(A, k);
+                    ldbk = BLKLDD(B, k);
+                    zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone;
+                    INSERT_TASK_zgemm(
+                        &options,
+                        ChamConjTrans, ChamNoTrans,
+                        tempmm, tempnn, tempkm, A->mb,
+                        alpha, A(k, m), ldak,
+                               B(k, n), ldbk,
+                        zbeta, C(m, n), ldcm);
 
-                        INSERT_TASK_zgemm(
-                            &options,
-                            trans, ChamNoTrans,
-                            tempnn, tempmm, tempkm, A->mb,
-                            conj(alpha), B(k, n), ldbk,  /* lda * K */
-                                   A(k, m), ldak,  /* lda * M */
-                            zone,  C(n, m), ldcn); /* ldc * M */
-                    }
+                    INSERT_TASK_zgemm(
+                        &options,
+                        ChamConjTrans, ChamNoTrans,
+                        tempmm, tempnn, tempkm, A->mb,
+                        conj(alpha), B(k, m), ldbk,
+                                     A(k, n), ldak,
+                        zone,        C(m, n), ldcm );
                 }
             }
         }
diff --git a/compute/pzsymm.c b/compute/pzsymm.c
index 0413d63b5f94fe5266ec2140232998360b175de8..f9d724c08ab8835e1dddffc332bb4298facc9ee2 100644
--- a/compute/pzsymm.c
+++ b/compute/pzsymm.c
@@ -23,47 +23,36 @@
  */
 #include "control/common.h"
 
-#define A(m, n) A,  m,  n
-#define B(m, n) B,  m,  n
-#define C(m, n) C,  m,  n
-#define WA(m, n) &WA,  m,  n
-#define WB(m, n) &WB,  m,  n
+#define A(  _m_, _n_ ) A,  (_m_), (_n_)
+#define B(  _m_, _n_ ) B,  (_m_), (_n_)
+#define C(  _m_, _n_ ) C,  (_m_), (_n_)
+#define WA( _m_, _n_ ) WA, (_m_), (_n_)
+#define WB( _m_, _n_ ) WB, (_m_), (_n_)
 
 /**
  *  Parallel tile symmetric matrix-matrix multiplication.
  *  SUMMA algorithm for 2D block-cyclic data distribution.
  */
 static inline void
-chameleon_pzsymm_summa( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t uplo,
-                        CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B,
-                        CHAMELEON_Complex64_t beta,  CHAM_desc_t *C,
-                        RUNTIME_option_t *options )
+chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
+                             CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B,
+                             CHAMELEON_Complex64_t beta,  CHAM_desc_t *C,
+                             CHAM_desc_t *WA, CHAM_desc_t *WB,
+                             RUNTIME_option_t *options )
 {
     RUNTIME_sequence_t *sequence = options->sequence;
-    cham_trans_t transA, transB;
-    int Am, An, m, n, k, p, q, KT, K, lp, lq;
-    int ldam, ldbk, ldbm, ldcm;
+    cham_trans_t transA;
+    int m, n, k, p, q, KT, K, lp, lq;
+    int ldcm;
     int tempmm, tempnn, tempkk;
     int lookahead, myp, myq;
 
     CHAMELEON_Complex64_t zbeta;
     CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t)1.0;
-    CHAM_desc_t WA, WB;
 
     lookahead = chamctxt->lookahead;
-    chameleon_desc_init( &WA, CHAMELEON_MAT_ALLOC_TILE,
-                         ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb),
-                         C->mt * C->mb, C->nb * C->q * lookahead, 0, 0,
-                         C->mt * C->mb, C->nb * C->q * lookahead, C->p, C->q,
-                         NULL, NULL, NULL );
-    chameleon_desc_init( &WB, CHAMELEON_MAT_ALLOC_TILE,
-                         ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb),
-                         C->mb * C->p * lookahead, C->nt * C->nb, 0, 0,
-                         C->mb * C->p * lookahead, C->nt * C->nb, C->p, C->q,
-                         NULL, NULL, NULL );
-
-    KT  = side == ChamLeft ? A->nt : A->mt;
-    K   = side == ChamLeft ? A->n  : A->m;
+    KT  = A->nt;
+    K   = A->n;
     myp = C->myrank / C->q;
     myq = C->myrank % C->q;
 
@@ -72,162 +61,291 @@ chameleon_pzsymm_summa( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t
         lq = (k % lookahead) * C->q;
         tempkk = k == KT - 1 ? K - k * A->nb : A->nb;
         zbeta = k == 0 ? beta : zone;
-        ldbk = BLKLDD(B, k);
 
         /* Transfert ownership of the k column of A or B */
         for (m = 0; m < C->mt; m ++ ) {
-            tempmm = m == C->mt-1 ? C->m - m * C->mb : C->mb;
+            int Am, Ak, ldam;
+            int tempam, tempak;
 
-            if ( side == ChamLeft ) {
-                if ( (( uplo == ChamUpper ) && ( m > k )) ||
-                     (( uplo == ChamLower ) && ( m < k )) ) {
-                    Am = k;
-                    An = m;
-                }
-                else {
-                    Am = m;
-                    An = k;
-                }
-                ldam = BLKLDD(A, Am);
-
-                INSERT_TASK_zlacpy(
-                    options,
-                    ChamUpperLower, tempmm, tempkk, C->mb,
-                    A(  Am, An ),             ldam,
-                    WA( m, (k % C->q) + lq ), WA.mb );
-
-                RUNTIME_data_flush( sequence, A( Am, An ) );
+            tempmm = m == C->mt-1 ? C->m - m * C->mb : C->mb;
 
-                for ( q=1; q < C->q; q++ ) {
-                    INSERT_TASK_zlacpy(
-                        options,
-                        ChamUpperLower, tempmm, tempkk, C->mb,
-                        WA( m, ((k+q-1) % C->q) + lq ), WA.mb,
-                        WA( m, ((k+q)   % C->q) + lq ), WA.mb );
-                }
+            if ( (( uplo == ChamUpper ) && ( m > k )) ||
+                 (( uplo == ChamLower ) && ( m < k )) )
+            {
+                    /* Let's take A( k, m ) */
+                Am = k;
+                Ak = m;
+                tempam = tempkk;
+                tempak = tempmm;
             }
             else {
-                ldbm = BLKLDD(B, m);
+                /* Let's take A( m, k ) */
+                Am = m;
+                Ak = k;
+                tempam = tempmm;
+                tempak = tempkk;
+            }
+            ldam = BLKLDD( A, Am );
 
-                INSERT_TASK_zlacpy(
-                    options,
-                    ChamUpperLower, tempmm, tempkk, C->mb,
-                    B(  m,  k ),              ldbm,
-                    WA( m, (k % C->q) + lq ), WA.mb );
+            INSERT_TASK_zlacpy(
+                options,
+                ChamUpperLower, tempam, tempak, C->mb,
+                A( Am, Ak ),              ldam,
+                WA( m, (k % C->q) + lq ), WA->mb );
 
-                RUNTIME_data_flush( sequence, B( m, k ) );
+            RUNTIME_data_flush( sequence, A( Am, Ak ) );
 
-                for ( q=1; q < C->q; q++ ) {
-                    INSERT_TASK_zlacpy(
-                        options,
-                        ChamUpperLower, tempmm, tempkk, C->mb,
-                        WA( m, ((k+q-1) % C->q) + lq ), WA.mb,
-                        WA( m, ((k+q)   % C->q) + lq ), WA.mb );
-                }
+            for ( q=1; q < C->q; q++ ) {
+                INSERT_TASK_zlacpy(
+                    options,
+                    ChamUpperLower, tempam, tempak, C->mb,
+                    WA( m, ((k+q-1) % C->q) + lq ), WA->mb,
+                    WA( m, ((k+q)   % C->q) + lq ), WA->mb );
             }
         }
 
         /* Transfert ownership of the k row of B, or A */
         for (n = 0; n < C->nt; n++) {
+            int ldbk;
+
             tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
+            ldbk = BLKLDD( B, k );
 
-            if ( side == ChamRight ) {
-                if ( (( uplo == ChamUpper ) && ( n < k )) ||
-                     (( uplo == ChamLower ) && ( n > k )) ) {
-                    Am = n;
-                    An = k;
-                }
-                else {
-                    Am = k;
-                    An = n;
-                }
-                ldam = BLKLDD(A, Am);
+            INSERT_TASK_zlacpy(
+                options,
+                ChamUpperLower, tempkk, tempnn, C->mb,
+                B(   k,              n ), ldbk,
+                WB( (k % C->p) + lp, n ), WB->mb );
 
+            RUNTIME_data_flush( sequence, B( k, n ) );
+
+            for ( p=1; p < C->p; p++ ) {
                 INSERT_TASK_zlacpy(
                     options,
                     ChamUpperLower, tempkk, tempnn, C->mb,
-                    A(   Am,            An ), ldam,
-                    WB( (k % C->p) + lp, n ), WB.mb );
+                    WB( ((k+p-1) % C->p) + lp, n ), WB->mb,
+                    WB( ((k+p)   % C->p) + lp, n ), WB->mb );
+            }
+        }
+
+        /* Perform the update of this iteration */
+        for (m = myp; m < C->mt; m+=C->p) {
+            tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+            ldcm = BLKLDD(C, m);
 
-                RUNTIME_data_flush( sequence, A( Am, An ) );
+            if ( k == m ) {
+                for (n = myq; n < C->nt; n+=C->q) {
+                    tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
 
-                for ( p=1; p < C->p; p++ ) {
-                    INSERT_TASK_zlacpy(
-                        options,
-                        ChamUpperLower, tempkk, tempnn, C->mb,
-                        WB( ((k+p-1) % C->p) + lp, n ), WB.mb,
-                        WB( ((k+p)   % C->p) + lp, n ), WB.mb );
+                    INSERT_TASK_zsymm(
+                        options, ChamLeft, uplo,
+                        tempmm, tempnn, A->mb,
+                        alpha, WA( m,        myq + lq ), WA->mb,
+                               WB( myp + lp, n        ), WB->mb,
+                        zbeta, C(  m,        n        ), ldcm );
                 }
             }
             else {
+                if ( (( uplo == ChamUpper ) && ( m > k )) ||
+                     (( uplo == ChamLower ) && ( m < k )) )
+                {
+                    transA = ChamTrans;
+                }
+                else {
+                    transA = ChamNoTrans;
+                }
+
+                for (n = myq; n < C->nt; n+=C->q) {
+                    tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
+
+                    INSERT_TASK_zgemm(
+                        options, transA, ChamNoTrans,
+                        tempmm, tempnn, tempkk, A->mb,
+                        alpha, WA( m,        myq + lq ), WA->mb,
+                               WB( myp + lp, n        ), WB->mb,
+                        zbeta, C(  m,        n        ), ldcm );
+                }
+            }
+        }
+    }
+}
+
+/**
+ *  Parallel tile symmetric matrix-matrix multiplication.
+ *  SUMMA algorithm for 2D block-cyclic data distribution.
+ */
+static inline void
+chameleon_pzsymm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
+                              CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B,
+                              CHAMELEON_Complex64_t beta,  CHAM_desc_t *C,
+                              CHAM_desc_t *WA, CHAM_desc_t *WB,
+                              RUNTIME_option_t *options )
+{
+    RUNTIME_sequence_t *sequence = options->sequence;
+    cham_trans_t transA;
+    int m, n, k, p, q, KT, K, lp, lq;
+    int ldcm;
+    int tempmm, tempnn, tempkk;
+    int lookahead, myp, myq;
+
+    CHAMELEON_Complex64_t zbeta;
+    CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t)1.0;
+
+    lookahead = chamctxt->lookahead;
+    KT  = A->mt;
+    K   = A->m;
+    myp = C->myrank / C->q;
+    myq = C->myrank % C->q;
+
+    for (k = 0; k < KT; k++ ) {
+        lp = (k % lookahead) * C->p;
+        lq = (k % lookahead) * C->q;
+        tempkk = k == KT - 1 ? K - k * A->nb : A->nb;
+        zbeta = k == 0 ? beta : zone;
+
+        /* Transfert ownership of the k column of A or B */
+        for (m = 0; m < C->mt; m++ ) {
+            int ldbm;
+
+            tempmm = m == C->mt-1 ? C->m - m * C->mb : C->mb;
+            ldbm = BLKLDD( B, m );
+
+            INSERT_TASK_zlacpy(
+                options,
+                ChamUpperLower, tempmm, tempkk, C->mb,
+                B(  m,  k ),              ldbm,
+                WA( m, (k % C->q) + lq ), WA->mb );
+
+            RUNTIME_data_flush( sequence, B( m, k ) );
+
+            for ( q=1; q < C->q; q++ ) {
                 INSERT_TASK_zlacpy(
                     options,
-                    ChamUpperLower, tempkk, tempnn, C->mb,
-                    B(   k,              n ), ldbk,
-                    WB( (k % C->p) + lp, n ), WB.mb );
+                    ChamUpperLower, tempmm, tempkk, C->mb,
+                    WA( m, ((k+q-1) % C->q) + lq ), WA->mb,
+                    WA( m, ((k+q)   % C->q) + lq ), WA->mb );
+            }
+        }
 
-                RUNTIME_data_flush( sequence, B( k, n ) );
+        /* Transfert ownership of the k row of B, or A */
+        for (n = 0; n < C->nt; n++) {
+            int Ak, An, ldak;
+            int tempak, tempan;
 
-                for ( p=1; p < C->p; p++ ) {
-                    INSERT_TASK_zlacpy(
-                        options,
-                        ChamUpperLower, tempkk, tempnn, C->mb,
-                        WB( ((k+p-1) % C->p) + lp, n ), WB.mb,
-                        WB( ((k+p)   % C->p) + lp, n ), WB.mb );
-                }
+            tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
+
+            if ( (( uplo == ChamUpper ) && ( n < k )) ||
+                 (( uplo == ChamLower ) && ( n > k )) )
+            {
+                Ak = n;
+                An = k;
+                tempak = tempnn;
+                tempan = tempkk;
+            }
+            else
+            {
+                Ak = k;
+                An = n;
+                tempak = tempkk;
+                tempan = tempnn;
+            }
+            ldak = BLKLDD( A, Ak );
+
+            INSERT_TASK_zlacpy(
+                options,
+                ChamUpperLower, tempak, tempan, C->mb,
+                A(  Ak,              An ), ldak,
+                WB( (k % C->p) + lp, n  ), WB->mb );
+
+            RUNTIME_data_flush( sequence, A( Ak, An ) );
+
+            for ( p=1; p < C->p; p++ ) {
+                INSERT_TASK_zlacpy(
+                    options,
+                    ChamUpperLower, tempak, tempan, C->mb,
+                    WB( ((k+p-1) % C->p) + lp, n ), WB->mb,
+                    WB( ((k+p)   % C->p) + lp, n ), WB->mb );
             }
         }
 
-        /*
-         *  ChamLeft / ChamLower
-         */
-        for (m = myp; m < C->mt; m+=C->p) {
-            tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-            ldcm = BLKLDD(C, m);
+        /* Perform the update of this iteration */
+        for (n = myq; n < C->nt; n+=C->q) {
+            tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
 
-            for (n = myq; n < C->nt; n+=C->q) {
-                tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
+            if ( k == n ) {
+                for (m = myp; m < C->mt; m+=C->p) {
+                    tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                    ldcm = BLKLDD(C, m);
 
-                if (side == ChamLeft) {
-                    transB = ChamNoTrans;
-                    if ( (( uplo == ChamUpper ) && ( m > k )) ||
-                         (( uplo == ChamLower ) && ( m < k )) ) {
-                        transA = ChamTrans;
-                    }
-                    else {
-                        transA = ChamNoTrans;
-                    }
+                    /* A has been stored in WA or WB for the summa ring */
+                    INSERT_TASK_zsymm(
+                        options, ChamRight, uplo,
+                        tempmm, tempnn, A->mb,
+                        alpha, WB( myp + lp, n        ), WB->mb,
+                               WA( m,        myq + lq ), WA->mb,
+                        zbeta, C(  m,        n        ), ldcm );
+                }
+            }
+            else {
+                if ( (( uplo == ChamUpper ) && ( n < k )) ||
+                     (( uplo == ChamLower ) && ( n > k )) )
+                {
+                    transA = ChamTrans;
                 }
                 else {
                     transA = ChamNoTrans;
-                    if ( (( uplo == ChamUpper ) && ( n < k )) ||
-                         (( uplo == ChamLower ) && ( n > k )) ) {
-                        transB = ChamTrans;
-                    }
-                    else {
-                        transB = ChamNoTrans;
-                    }
                 }
 
-                if ( k == m ) {
-                    INSERT_TASK_zsymm(
-                        options, side, uplo,
-                        tempmm, tempnn, A->mb,
-                        alpha, WA( m,        myq + lq ), WA.mb,  /* lda * Z */
-                               WB( myp + lp, n        ), WB.mb,  /* ldb * Y */
-                        zbeta, C(  m,        n        ), ldcm ); /* ldc * Y */
-                }
-                else {
+                for (m = myp; m < C->mt; m+=C->p) {
+                    tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                    ldcm = BLKLDD(C, m);
+
                     INSERT_TASK_zgemm(
-                        options, transA, transB,
+                        options, ChamNoTrans, transA,
                         tempmm, tempnn, tempkk, A->mb,
-                        alpha, WA( m,        myq + lq ), WA.mb,  /* lda * Z */
-                               WB( myp + lp, n        ), WB.mb,  /* ldb * Y */
-                        zbeta, C(  m,        n        ), ldcm ); /* ldc * Y */
+                        alpha, WA( m,        myq + lq ), WA->mb,
+                               WB( myp + lp, n        ), WB->mb,
+                        zbeta, C(  m,        n        ), ldcm );
                 }
             }
         }
     }
+}
+
+/**
+ *  Parallel tile symmetric matrix-matrix multiplication.
+ *  SUMMA algorithm for 2D block-cyclic data distribution.
+ */
+static inline void
+chameleon_pzsymm_summa( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t uplo,
+                        CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B,
+                        CHAMELEON_Complex64_t beta,  CHAM_desc_t *C,
+                        RUNTIME_option_t *options )
+{
+    RUNTIME_sequence_t *sequence = options->sequence;
+    CHAM_desc_t WA, WB;
+    int lookahead;
+
+    lookahead = chamctxt->lookahead;
+    chameleon_desc_init( &WA, CHAMELEON_MAT_ALLOC_TILE,
+                         ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb),
+                         C->mt * C->mb, C->nb * C->q * lookahead, 0, 0,
+                         C->mt * C->mb, C->nb * C->q * lookahead, C->p, C->q,
+                         NULL, NULL, NULL );
+    chameleon_desc_init( &WB, CHAMELEON_MAT_ALLOC_TILE,
+                         ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb),
+                         C->mb * C->p * lookahead, C->nt * C->nb, 0, 0,
+                         C->mb * C->p * lookahead, C->nt * C->nb, C->p, C->q,
+                         NULL, NULL, NULL );
+
+    if (side == ChamLeft) {
+        chameleon_pzsymm_summa_left( chamctxt, uplo, alpha, A, B, beta, C,
+                                     &WA, &WB, options );
+    }
+    else {
+        chameleon_pzsymm_summa_right( chamctxt, uplo, alpha, A, B, beta, C,
+                                      &WA, &WB, options );
+    }
 
     RUNTIME_desc_flush( &WA, sequence );
     RUNTIME_desc_flush( &WB, sequence );
@@ -453,7 +571,8 @@ chameleon_pzsymm( cham_side_t side, cham_uplo_t uplo,
     {
         chameleon_pzsymm_summa(   chamctxt, side, uplo, alpha, A, B, beta, C, &options );
     }
-    else {
+    else
+    {
         chameleon_pzsymm_generic( chamctxt, side, uplo, alpha, A, B, beta, C, &options );
     }
 
diff --git a/compute/pzsyr2k.c b/compute/pzsyr2k.c
index 17ae3ee8bbaad7f560b9f80d2d0bb0b53789a7a5..0b5f7195f398bd8319a38868cc6ac8ac7de92d55 100644
--- a/compute/pzsyr2k.c
+++ b/compute/pzsyr2k.c
@@ -29,15 +29,15 @@
 /**
  *  Parallel tile Hermitian rank-k update - dynamic scheduling
  */
-void chameleon_pzsyr2k(cham_uplo_t uplo, cham_trans_t trans,
-                          CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B,
-                          CHAMELEON_Complex64_t beta,  CHAM_desc_t *C,
-                          RUNTIME_sequence_t *sequence, RUNTIME_request_t *request)
+void chameleon_pzsyr2k( cham_uplo_t uplo, cham_trans_t trans,
+                        CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B,
+                        CHAMELEON_Complex64_t beta,  CHAM_desc_t *C,
+                        RUNTIME_sequence_t *sequence, RUNTIME_request_t *request)
 {
     CHAM_context_t *chamctxt;
     RUNTIME_option_t options;
 
-    int m, n, k;
+    int m, n, k, mmin, mmax;
     int ldak, ldam, ldan, ldcm, ldcn;
     int ldbk, ldbm, ldbn;
     int tempnn, tempmm, tempkn, tempkm;
@@ -56,6 +56,16 @@ void chameleon_pzsyr2k(cham_uplo_t uplo, cham_trans_t trans,
         ldan = BLKLDD(A, n);
         ldbn = BLKLDD(B, n);
         ldcn = BLKLDD(C, n);
+
+        if (uplo == ChamLower) {
+            mmin = n+1;
+            mmax = C->mt;
+        }
+        else {
+            mmin = 0;
+            mmax = n;
+        }
+
         /*
          *  ChamNoTrans
          */
@@ -71,68 +81,34 @@ void chameleon_pzsyr2k(cham_uplo_t uplo, cham_trans_t trans,
                            B(n, k), ldbn,
                     zbeta, C(n, n), ldcn); /* ldc  * N */
             }
-            /*
-             *  ChamNoTrans / ChamLower
-             */
-            if (uplo == ChamLower) {
-                for (m = n+1; m < C->mt; m++) {
-                    tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                    ldam = BLKLDD(A, m);
-                    ldbm = BLKLDD(B, m);
-                    ldcm = BLKLDD(C, m);
-                    for (k = 0; k < A->nt; k++) {
-                        tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
-                        zbeta = k == 0 ? beta : zone;
-                        INSERT_TASK_zgemm(
-                            &options,
-                            trans, ChamTrans,
-                            tempmm, tempnn, tempkn, A->mb,
-                            alpha, A(m, k), ldam,  /* ldam * K */
-                                   B(n, k), ldbn,  /* ldan * K */
-                            zbeta, C(m, n), ldcm); /* ldc  * N */
+            for (m = mmin; m < mmax; m++) {
+                tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                ldam = BLKLDD(A, m);
+                ldbm = BLKLDD(B, m);
+                ldcm = BLKLDD(C, m);
+                for (k = 0; k < A->nt; k++) {
+                    tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
+                    zbeta = k == 0 ? beta : zone;
+                    INSERT_TASK_zgemm(
+                        &options,
+                        ChamNoTrans, ChamTrans,
+                        tempmm, tempnn, tempkn, A->mb,
+                        alpha, A(m, k), ldam,
+                               B(n, k), ldbn,
+                        zbeta, C(m, n), ldcm);
 
-                        INSERT_TASK_zgemm(
-                            &options,
-                            trans, ChamTrans,
-                            tempmm, tempnn, tempkn, A->mb,
-                            alpha, B(m, k), ldbm,  /* ldam * K */
-                                   A(n, k), ldan,  /* ldan * K */
-                            zone,  C(m, n), ldcm); /* ldc  * N */
-                    }
-                }
-            }
-            /*
-             *  ChamNoTrans / ChamUpper
-             */
-            else {
-                for (m = n+1; m < C->mt; m++) {
-                    tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                    ldam = BLKLDD(A, m);
-                    ldbm = BLKLDD(B, m);
-                    for (k = 0; k < A->nt; k++) {
-                        tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
-                        zbeta = k == 0 ? beta : zone;
-                        INSERT_TASK_zgemm(
-                            &options,
-                            trans, ChamTrans,
-                            tempnn, tempmm, tempkn, A->mb,
-                            alpha, A(n, k), ldan,  /* ldan * K */
-                                   B(m, k), ldbm,  /* ldam * M */
-                            zbeta, C(n, m), ldcn); /* ldc  * M */
-
-                        INSERT_TASK_zgemm(
-                            &options,
-                            trans, ChamTrans,
-                            tempnn, tempmm, tempkn, A->mb,
-                            alpha, B(n, k), ldan,  /* ldan * K */
-                                   A(m, k), ldam,  /* ldam * M */
-                            zone,  C(n, m), ldcn); /* ldc  * M */
-                    }
+                    INSERT_TASK_zgemm(
+                        &options,
+                        ChamNoTrans, ChamTrans,
+                        tempmm, tempnn, tempkn, A->mb,
+                        alpha, B(m, k), ldbm,
+                               A(n, k), ldan,
+                        zone,  C(m, n), ldcm);
                 }
             }
         }
         /*
-         *  Cham[Conj]Trans
+         *  ChamTrans
          */
         else {
             for (k = 0; k < A->mt; k++) {
@@ -148,63 +124,29 @@ void chameleon_pzsyr2k(cham_uplo_t uplo, cham_trans_t trans,
                            B(k, n), ldbk,
                     zbeta, C(n, n), ldcn); /* ldc * N */
             }
-            /*
-             *  Cham[Conj]Trans / ChamLower
-             */
-            if (uplo == ChamLower) {
-                for (m = n+1; m < C->mt; m++) {
-                    tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                    ldcm = BLKLDD(C, m);
-                    for (k = 0; k < A->mt; k++) {
-                        tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
-                        ldak = BLKLDD(A, k);
-                        ldbk = BLKLDD(B, k);
-                        zbeta = k == 0 ? beta : zone;
-                        INSERT_TASK_zgemm(
-                            &options,
-                            trans, ChamNoTrans,
-                            tempmm, tempnn, tempkm, A->mb,
-                            alpha, A(k, m), ldak,  /* lda * M */
-                                   B(k, n), ldbk,  /* lda * N */
-                            zbeta, C(m, n), ldcm); /* ldc * N */
-
-                        INSERT_TASK_zgemm(
-                            &options,
-                            trans, ChamNoTrans,
-                            tempmm, tempnn, tempkm, A->mb,
-                            alpha, B(k, m), ldbk,  /* lda * M */
-                                   A(k, n), ldak,  /* lda * N */
-                            zone,  C(m, n), ldcm); /* ldc * N */
-                    }
-                }
-            }
-            /*
-             *  Cham[Conj]Trans / ChamUpper
-             */
-            else {
-                for (m = n+1; m < C->mt; m++) {
-                    tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                    for (k = 0; k < A->mt; k++) {
-                        tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
-                        ldak = BLKLDD(A, k);
-                        ldbk = BLKLDD(B, k);
-                        zbeta = k == 0 ? beta : zone;
-                        INSERT_TASK_zgemm(
-                            &options,
-                            trans, ChamNoTrans,
-                            tempnn, tempmm, tempkm, A->mb,
-                            alpha, A(k, n), ldak,  /* lda * K */
-                                   B(k, m), ldbk,  /* lda * M */
-                            zbeta, C(n, m), ldcn); /* ldc * M */
+            for (m = mmin; m < mmax; m++) {
+                tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                ldcm = BLKLDD(C, m);
+                for (k = 0; k < A->mt; k++) {
+                    tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
+                    ldak = BLKLDD(A, k);
+                    ldbk = BLKLDD(B, k);
+                    zbeta = k == 0 ? beta : zone;
+                    INSERT_TASK_zgemm(
+                        &options,
+                        ChamTrans, ChamNoTrans,
+                        tempmm, tempnn, tempkm, A->mb,
+                        alpha, A(k, m), ldak,
+                               B(k, n), ldbk,
+                        zbeta, C(m, n), ldcm);
 
-                        INSERT_TASK_zgemm(
-                            &options,
-                            trans, ChamNoTrans,
-                            tempnn, tempmm, tempkm, A->mb,
-                            alpha, B(k, n), ldbk,  /* lda * K */
-                                   A(k, m), ldak,  /* lda * M */
-                            zone,  C(n, m), ldcn); /* ldc * M */
-                    }
+                    INSERT_TASK_zgemm(
+                        &options,
+                        ChamTrans, ChamNoTrans,
+                        tempmm, tempnn, tempkm, A->mb,
+                        alpha, B(k, m), ldbk,
+                               A(k, n), ldak,
+                        zone,  C(m, n), ldcm );
                 }
             }
         }