Mentions légales du service

Skip to content
Snippets Groups Projects
pzsymm.c 30.80 KiB
/**
 *
 * @file pzsymm.c
 *
 * @copyright 2009-2014 The University of Tennessee and The University of
 *                      Tennessee Research Foundation. All rights reserved.
 * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
 *                      Univ. Bordeaux. All rights reserved.
 *
 ***
 *
 * @brief Chameleon zsymm parallel algorithm
 *
 * @version 1.3.0
 * @comment This file has been automatically generated
 *          from Plasma 2.5.0 for CHAMELEON 0.9.2
 * @author Mathieu Faverge
 * @author Emmanuel Agullo
 * @author Cedric Castagnede
 * @author Florent Pruvost
 * @author Alycia Lisito
 * @author Romain Peressoni
 * @author Pierre Esterie
 * @date 2025-01-24
 * @precisions normal z -> s d c
 *
 */
#include "control/common.h"

#define A(  _m_, _n_ ) A,  (_m_), (_n_)
#define B(  _m_, _n_ ) B,  (_m_), (_n_)
#define C(  _m_, _n_ ) C,  (_m_), (_n_)
#define WA( _m_, _n_ ) WA, (_m_), (_n_)
#define WB( _m_, _n_ ) WB, (_m_), (_n_)

/**
 *  Parallel tile matrix-matrix multiplication.
 *  Generic algorithm for any data distribution with a stationnary A.
 *
 * Assuming A has been setup with a proper getrank function to account for symmetry
 */
static inline void
chameleon_pzsymm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t uplo,
                        CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B,
                        CHAMELEON_Complex64_t beta,  CHAM_desc_t *C,
                        RUNTIME_option_t *options )
{
    const CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t)1.0;
    RUNTIME_sequence_t *sequence = options->sequence;
    int                 k, m, n, l, Am, An;
    int                 tempmm, tempnn, tempkn, tempkm;
    int                 myrank = RUNTIME_comm_rank( chamctxt );
    int8_t             *reduceC = calloc( C->mt * C->nt, sizeof(int8_t) );

    /* Set C tiles to redux mode */
    for (n = 0; n < C->nt; n++) {
        for (m = 0; m < C->mt; m++) {
            /* The node owns the C tile. */
            if ( C->get_rankof( C(m, n) ) == myrank ) {
                reduceC[ n * C->mt + m ] = 1;
                RUNTIME_zgersum_set_methods( C(m, n) );
                continue;
            }

            /*
             * The node owns the A tile that will define the locality of the
             * computations.
             */
            /* Select row or column based on side */
            l = ( side == ChamLeft ) ? m : n;

            if ( uplo == ChamLower ) {
                for (k = 0; k < A->mt; k++) {
                    Am = k;
                    An = k;

                    if (k < l) {
                        Am = l;
                    }
                    else if (k > l) {
                        An = l;
                    }

                    if ( A->get_rankof( A( Am, An ) ) == myrank ) {
                        reduceC[ n * C->mt + m ] = 1;
                        RUNTIME_zgersum_set_methods( C(m, n) );
                        break;
                    }
                }
            }
            else {
                for (k = 0; k < A->mt; k++) {
                    Am = k;
                    An = k;

                    if (k < l) {
                        An = l;
                    }
                    else if (k > l) {
                        Am = l;
                    }

                    if ( A->get_rankof( A( Am, An ) ) == myrank ) {
                        reduceC[ n * C->mt + m ] = 1;
                        RUNTIME_zgersum_set_methods( C(m, n) );
                        break;
                    }
                }
            }
        }
    }

    for(n = 0; n < C->nt; n++) {
        tempnn = C->get_blkdim( C, n, DIM_n, C->n );
        for(m = 0; m < C->mt; m++) {
            tempmm = C->get_blkdim( C, m, DIM_m, C->m );

            /* Scale C */
            options->forcesub = 0;
            INSERT_TASK_zlascal( options, ChamUpperLower, tempmm, tempnn, C->mb,
                                 beta, C, m, n );
            options->forcesub = reduceC[ n * C->mt + m ];

            /*
             *  ChamLeft / ChamLower
             */
            if (side == ChamLeft) {
                if (uplo == ChamLower) {
                    for (k = 0; k < C->mt; k++) {
                        tempkm = C->get_blkdim( C, k, DIM_m, C->m );

                        if (k < m) {
                            INSERT_TASK_zgemm_Astat(
                                options,
                                ChamNoTrans, ChamNoTrans,
                                tempmm, tempnn, tempkm, A->mb,
                                alpha, A(m, k),  /* lda * K */
                                       B(k, n),  /* ldb * Y */
                                zone,  C(m, n)); /* ldc * Y */
                        }
                        else if (k == m) {
                                INSERT_TASK_zsymm_Astat(
                                    options,
                                    side, uplo,
                                    tempmm, tempnn, A->mb,
                                    alpha, A(k, k),  /* ldak * X */
                                           B(k, n),  /* ldb  * Y */
                                    zone,  C(m, n)); /* ldc  * Y */
                        }
                        else {
                            INSERT_TASK_zgemm_Astat(
                                options,
                                ChamTrans, ChamNoTrans,
                                tempmm, tempnn, tempkm, A->mb,
                                alpha, A(k, m),  /* ldak * X */
                                       B(k, n),  /* ldb  * Y */
                                zone,  C(m, n)); /* ldc  * Y */
                        }
                    }
                }
                /*
                 *  ChamLeft / ChamUpper
                 */
                else {
                    for (k = 0; k < C->mt; k++) {
                        tempkm = C->get_blkdim( C, k, DIM_m, C->m );

                        if (k < m) {
                            INSERT_TASK_zgemm_Astat(
                                options,
                                ChamTrans, ChamNoTrans,
                                tempmm, tempnn, tempkm, A->mb,
                                alpha, A(k, m),  /* ldak * X */
                                       B(k, n),  /* ldb  * Y */
                                zone,  C(m, n)); /* ldc  * Y */
                        }
                        else if (k == m) {
                            INSERT_TASK_zsymm_Astat(
                                options,
                                side, uplo,
                                tempmm, tempnn, A->mb,
                                alpha, A(k, k),  /* ldak * K */
                                       B(k, n),  /* ldb  * Y */
                                zone,  C(m, n)); /* ldc  * Y */
                        }
                        else {
                            INSERT_TASK_zgemm_Astat(
                                options,
                                ChamNoTrans, ChamNoTrans,
                                tempmm, tempnn, tempkm, A->mb,
                                alpha, A(m, k),  /* lda * K */
                                       B(k, n),  /* ldb * Y */
                                zone,  C(m, n)); /* ldc * Y */
                        }
                    }
                }
            }
            /*
             *  ChamRight / ChamLower
             */
            else {
                if (uplo == ChamLower) {
                    for (k = 0; k < C->nt; k++) {
                        tempkn = C->get_blkdim( C, k, DIM_n, C->n );

                        if (k < n) {
                            INSERT_TASK_zgemm_Astat(
                                options,
                                ChamNoTrans, ChamTrans,
                                tempmm, tempnn, tempkn, A->mb,
                                alpha, B(m, k),  /* ldb * K */
                                       A(n, k),  /* lda * K */
                                zone,  C(m, n)); /* ldc * Y */
                        }
                        else if (k == n) {
                            INSERT_TASK_zsymm_Astat(
                                options,
                                side, uplo,
                                tempmm, tempnn, A->mb,
                                alpha, A(k, k),  /* ldak * Y */
                                       B(m, k),  /* ldb  * Y */
                                zone,  C(m, n)); /* ldc  * Y */
                        }
                        else {
                            INSERT_TASK_zgemm_Astat(
                                options,
                                ChamNoTrans, ChamNoTrans,
                                tempmm, tempnn, tempkn, A->mb,
                                alpha, B(m, k),  /* ldb  * K */
                                       A(k, n),  /* ldak * Y */
                                zone,  C(m, n)); /* ldc  * Y */
                        }
                    }
                }
                /*
                 *  ChamRight / ChamUpper
                 */
                else {
                    for (k = 0; k < C->nt; k++) {
                        tempkn = C->get_blkdim( C, k, DIM_n, C->n );

                        if (k < n) {
                            INSERT_TASK_zgemm_Astat(
                                options,
                                ChamNoTrans, ChamNoTrans,
                                tempmm, tempnn, tempkn, A->mb,
                                alpha, B(m, k),  /* ldb  * K */
                                       A(k, n),  /* ldak * Y */
                                zone,  C(m, n)); /* ldc  * Y */
                        }
                        else if (k == n) {
                            INSERT_TASK_zsymm_Astat(
                                options,
                                side, uplo,
                                tempmm, tempnn, A->mb,
                                alpha, A(k, k),  /* ldak * Y */
                                       B(m, k),  /* ldb  * Y */
                                zone,  C(m, n)); /* ldc  * Y */
                        }
                        else {
                            INSERT_TASK_zgemm_Astat(
                                options,
                                ChamNoTrans, ChamTrans,
                                tempmm, tempnn, tempkn, A->mb,
                                alpha, B(m, k),  /* ldb * K */
                                       A(n, k),  /* lda * K */
                                zone,  C(m, n)); /* ldc * Y */
                        }
                    }
                }
            }

            RUNTIME_zgersum_submit_tree( options, C(m, n) );
            RUNTIME_data_flush( sequence, C(m, n) );
        }
    }
    options->forcesub = 0;
    free( reduceC );

    (void)chamctxt;
}


/**
 *  Parallel tile symmetric matrix-matrix multiplication.
 *  SUMMA algorithm for 2D block-cyclic data distribution.
 */
static inline void
chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
                             CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B,
                             CHAMELEON_Complex64_t beta,  CHAM_desc_t *C,
                             CHAM_desc_t *WA, CHAM_desc_t *WB,
                             RUNTIME_option_t *options )
{
    RUNTIME_sequence_t *sequence = options->sequence;
    cham_trans_t transA;
    int m, n, k, p, q, KT, lp, lq;
    int tempmm, tempnn, tempkk;
    int lookahead, myp, myq;

    CHAMELEON_Complex64_t zbeta;
    CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t)1.0;

    lookahead = chamctxt->lookahead;
    KT  = A->nt;
    myp = C->myrank / chameleon_desc_datadist_get_iparam(C, 1);
    myq = C->myrank % chameleon_desc_datadist_get_iparam(C, 1);

    for (k = 0; k < KT; k++ ) {
        lp = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 0);
        lq = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 1);
        tempkk = A->get_blkdim( A, k, DIM_n, A->n );
        zbeta = k == 0 ? beta : zone;

        /* Transfert ownership of the k column of A or B */
        for (m = 0; m < C->mt; m ++ ) {
            int Am, Ak;
            int tempam, tempak;

            tempmm = C->get_blkdim( C, m, DIM_m, C->m );

            if ( (( uplo == ChamUpper ) && ( m > k )) ||
                 (( uplo == ChamLower ) && ( m < k )) )
            {
                    /* Let's take A( k, m ) */
                Am = k;
                Ak = m;
                tempam = tempkk;
                tempak = tempmm;
            }
            else {
                /* Let's take A( m, k ) */
                Am = m;
                Ak = k;
                tempam = tempmm;
                tempak = tempkk;
            }

            INSERT_TASK_zlacpy(
                options,
                ChamUpperLower, tempam, tempak,
                A( Am, Ak ),
                WA( m, (Ak % chameleon_desc_datadist_get_iparam(C, 1)) + lq ) );

            RUNTIME_data_flush( sequence, A( Am, Ak ) );

            for ( q=1; q < chameleon_desc_datadist_get_iparam(C, 1); q++ ) {
                INSERT_TASK_zlacpy(
                    options,
                    ChamUpperLower, tempam, tempak,
                    WA( m, ((Ak+q-1) % chameleon_desc_datadist_get_iparam(C, 1)) + lq ),
                    WA( m, ((Ak+q)   % chameleon_desc_datadist_get_iparam(C, 1)) + lq ) );
            }
        }

        /* Transfert ownership of the k row of B, or A */
        for (n = 0; n < C->nt; n++) {

            tempnn = C->get_blkdim( C, n, DIM_n, C->n );

            INSERT_TASK_zlacpy(
                options,
                ChamUpperLower, tempkk, tempnn,
                B(   k,              n ),
                WB( (k % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ) );

            RUNTIME_data_flush( sequence, B( k, n ) );

            for ( p=1; p < chameleon_desc_datadist_get_iparam(C, 0); p++ ) {
                INSERT_TASK_zlacpy(
                    options,
                    ChamUpperLower, tempkk, tempnn,
                    WB( ((k+p-1) % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ),
                    WB( ((k+p)   % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ) );
            }
        }

        /* Perform the update of this iteration */
        for (m = myp; m < C->mt; m+=chameleon_desc_datadist_get_iparam(C, 0)) {
            tempmm = C->get_blkdim( C, m, DIM_m, C->m );

            if ( k == m ) {
                for (n = myq; n < C->nt; n+=chameleon_desc_datadist_get_iparam(C, 1)) {
                    tempnn = C->get_blkdim( C, n, DIM_n, C->n );

                    INSERT_TASK_zsymm(
                        options, ChamLeft, uplo,
                        tempmm, tempnn, A->mb,
                        alpha, WA( m,        myq + lq ),
                               WB( myp + lp, n        ),
                        zbeta, C(  m,        n        ) );
                }
            }
            else {
                if ( (( uplo == ChamUpper ) && ( m > k )) ||
                     (( uplo == ChamLower ) && ( m < k )) )
                {
                    transA = ChamTrans;
                }
                else {
                    transA = ChamNoTrans;
                }

                for (n = myq; n < C->nt; n+=chameleon_desc_datadist_get_iparam(C, 1)) {
                    tempnn = C->get_blkdim( C, n, DIM_n, C->n );

                    INSERT_TASK_zgemm(
                        options, transA, ChamNoTrans,
                        tempmm, tempnn, tempkk, A->mb,
                        alpha, WA( m,        myq + lq ),
                               WB( myp + lp, n        ),
                        zbeta, C(  m,        n        ) );
                }
            }
        }
    }
}

/**
 *  Parallel tile symmetric matrix-matrix multiplication.
 *  SUMMA algorithm for 2D block-cyclic data distribution.
 */
static inline void
chameleon_pzsymm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
                              CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B,
                              CHAMELEON_Complex64_t beta,  CHAM_desc_t *C,
                              CHAM_desc_t *WA, CHAM_desc_t *WB,
                              RUNTIME_option_t *options )
{
    RUNTIME_sequence_t *sequence = options->sequence;
    cham_trans_t transA;
    int m, n, k, p, q, KT, lp, lq;
    int tempmm, tempnn, tempkk;
    int lookahead, myp, myq;

    CHAMELEON_Complex64_t zbeta;
    CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t)1.0;

    lookahead = chamctxt->lookahead;
    KT  = A->mt;
    myp = C->myrank / chameleon_desc_datadist_get_iparam(C, 1);
    myq = C->myrank % chameleon_desc_datadist_get_iparam(C, 1);

    for (k = 0; k < KT; k++ ) {
        lp = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 0);
        lq = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 1);
        tempkk = A->get_blkdim( A, k, DIM_m, A->m );
        zbeta = k == 0 ? beta : zone;

        /* Transfert ownership of the k column of A or B */
        for (m = 0; m < C->mt; m++ ) {

            tempmm = C->get_blkdim( C, m, DIM_m, C->m );

            INSERT_TASK_zlacpy(
                options,
                ChamUpperLower, tempmm, tempkk,
                B(  m,  k ),
                WA( m, (k % chameleon_desc_datadist_get_iparam(C, 1)) + lq ) );

            RUNTIME_data_flush( sequence, B( m, k ) );

            for ( q=1; q < chameleon_desc_datadist_get_iparam(C, 1); q++ ) {
                INSERT_TASK_zlacpy(
                    options,
                    ChamUpperLower, tempmm, tempkk,
                    WA( m, ((k+q-1) % chameleon_desc_datadist_get_iparam(C, 1)) + lq ),
                    WA( m, ((k+q)   % chameleon_desc_datadist_get_iparam(C, 1)) + lq ) );
            }
        }

        /* Transfert ownership of the k row of B, or A */
        for (n = 0; n < C->nt; n++) {
            int Ak, An;
            int tempak, tempan;

            tempnn = C->get_blkdim( C, n, DIM_n, C->n );

            if ( (( uplo == ChamUpper ) && ( n < k )) ||
                 (( uplo == ChamLower ) && ( n > k )) )
            {
                Ak = n;
                An = k;
                tempak = tempnn;
                tempan = tempkk;
            }
            else
            {
                Ak = k;
                An = n;
                tempak = tempkk;
                tempan = tempnn;
            }

            INSERT_TASK_zlacpy(
                options,
                ChamUpperLower, tempak, tempan,
                A(  Ak,              An ),
                WB( (Ak % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n  ) );

            RUNTIME_data_flush( sequence, A( Ak, An ) );

            for ( p=1; p < chameleon_desc_datadist_get_iparam(C, 0); p++ ) {
                INSERT_TASK_zlacpy(
                    options,
                    ChamUpperLower, tempak, tempan,
                    WB( ((Ak+p-1) % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ),
                    WB( ((Ak+p)   % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ) );
            }
        }

        /* Perform the update of this iteration */
        for (n = myq; n < C->nt; n+=chameleon_desc_datadist_get_iparam(C, 1)) {
            tempnn = C->get_blkdim( C, n, DIM_n, C->n );

            if ( k == n ) {
                for (m = myp; m < C->mt; m+=chameleon_desc_datadist_get_iparam(C, 0)) {
                    tempmm = C->get_blkdim( C, m, DIM_m, C->m );

                    /* A has been stored in WA or WB for the summa ring */
                    INSERT_TASK_zsymm(
                        options, ChamRight, uplo,
                        tempmm, tempnn, A->mb,
                        alpha, WB( myp + lp, n        ),
                               WA( m,        myq + lq ),
                        zbeta, C(  m,        n        ) );
                }
            }
            else {
                if ( (( uplo == ChamUpper ) && ( n < k )) ||
                     (( uplo == ChamLower ) && ( n > k )) )
                {
                    transA = ChamTrans;
                }
                else {
                    transA = ChamNoTrans;
                }

                for (m = myp; m < C->mt; m+=chameleon_desc_datadist_get_iparam(C, 0)) {
                    tempmm = C->get_blkdim( C, m, DIM_m, C->m );

                    INSERT_TASK_zgemm(
                        options, ChamNoTrans, transA,
                        tempmm, tempnn, tempkk, A->mb,
                        alpha, WA( m,        myq + lq ),
                               WB( myp + lp, n        ),
                        zbeta, C(  m,        n        ) );
                }
            }
        }
    }
}

/**
 *  Parallel tile symmetric matrix-matrix multiplication.
 *  SUMMA algorithm for 2D block-cyclic data distribution.
 */
static inline void
chameleon_pzsymm_summa( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t uplo,
                        CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B,
                        CHAMELEON_Complex64_t beta,  CHAM_desc_t *C,
                        CHAM_desc_t *WA, CHAM_desc_t *WB,
                        RUNTIME_option_t *options )
{
    RUNTIME_sequence_t *sequence = options->sequence;

    if (side == ChamLeft) {
        chameleon_pzsymm_summa_left( chamctxt, uplo, alpha, A, B, beta, C,
                                     WA, WB, options );
    }
    else {
        chameleon_pzsymm_summa_right( chamctxt, uplo, alpha, A, B, beta, C,
                                      WA, WB, options );
    }

    CHAMELEON_Desc_Flush( WA, sequence );
    CHAMELEON_Desc_Flush( WB, sequence );
}

/**
 *  Parallel tile symmetric matrix-matrix multiplication.
 *  Generic algorithm for any data distribution.
 */
static inline void
chameleon_pzsymm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t uplo,
                          CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B,
                          CHAMELEON_Complex64_t beta,  CHAM_desc_t *C,
                          RUNTIME_option_t *options )
{
    int k, m, n;
    int tempmm, tempnn, tempkn, tempkm;

    CHAMELEON_Complex64_t zbeta;
    CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t)1.0;

    for(m = 0; m < C->mt; m++) {
        tempmm = C->get_blkdim( C, m, DIM_m, C->m );
        for(n = 0; n < C->nt; n++) {
            tempnn = C->get_blkdim( C, n, DIM_n, C->n );
            /*
             *  ChamLeft / ChamLower
             */
            if (side == ChamLeft) {
                if (uplo == ChamLower) {
                    for (k = 0; k < C->mt; k++) {
                        tempkm = C->get_blkdim( C, k, DIM_m, C->m );
                        zbeta = k == 0 ? beta : zone;
                        if (k < m) {
                            INSERT_TASK_zgemm(
                                options,
                                ChamNoTrans, ChamNoTrans,
                                tempmm, tempnn, tempkm, A->mb,
                                alpha, A(m, k),  /* lda * K */
                                       B(k, n),  /* ldb * Y */
                                zbeta, C(m, n)); /* ldc * Y */
                        }
                        else {
                            if (k == m) {
                                INSERT_TASK_zsymm(
                                    options,
                                    side, uplo,
                                    tempmm, tempnn, A->mb,
                                    alpha, A(k, k),  /* ldak * X */
                                           B(k, n),  /* ldb  * Y */
                                    zbeta, C(m, n)); /* ldc  * Y */
                            }
                            else {
                                INSERT_TASK_zgemm(
                                    options,
                                    ChamTrans, ChamNoTrans,
                                    tempmm, tempnn, tempkm, A->mb,
                                    alpha, A(k, m),  /* ldak * X */
                                           B(k, n),  /* ldb  * Y */
                                    zbeta, C(m, n)); /* ldc  * Y */
                            }
                        }
                    }
                }
                /*
                 *  ChamLeft / ChamUpper
                 */
                else {
                    for (k = 0; k < C->mt; k++) {
                        tempkm = C->get_blkdim( C, k, DIM_m, C->m );
                        zbeta = k == 0 ? beta : zone;
                        if (k < m) {
                            INSERT_TASK_zgemm(
                                options,
                                ChamTrans, ChamNoTrans,
                                tempmm, tempnn, tempkm, A->mb,
                                alpha, A(k, m),  /* ldak * X */
                                       B(k, n),  /* ldb  * Y */
                                zbeta, C(m, n)); /* ldc  * Y */
                        }
                        else {
                            if (k == m) {
                                INSERT_TASK_zsymm(
                                    options,
                                    side, uplo,
                                    tempmm, tempnn, A->mb,
                                    alpha, A(k, k),  /* ldak * K */
                                           B(k, n),  /* ldb  * Y */
                                    zbeta, C(m, n)); /* ldc  * Y */
                            }
                            else {
                                INSERT_TASK_zgemm(
                                    options,
                                    ChamNoTrans, ChamNoTrans,
                                    tempmm, tempnn, tempkm, A->mb,
                                    alpha, A(m, k),  /* lda * K */
                                           B(k, n),  /* ldb * Y */
                                    zbeta, C(m, n)); /* ldc * Y */
                            }
                        }
                    }
                }
            }
            /*
             *  ChamRight / ChamLower
             */
            else {
                if (uplo == ChamLower) {
                    for (k = 0; k < C->nt; k++) {
                        tempkn = C->get_blkdim( C, k, DIM_n, C->n );
                        zbeta = k == 0 ? beta : zone;
                        if (k < n) {
                            INSERT_TASK_zgemm(
                                options,
                                ChamNoTrans, ChamTrans,
                                tempmm, tempnn, tempkn, A->mb,
                                alpha, B(m, k),  /* ldb * K */
                                       A(n, k),  /* lda * K */
                                zbeta, C(m, n)); /* ldc * Y */
                        }
                        else {
                            if (k == n) {
                                INSERT_TASK_zsymm(
                                    options,
                                    side, uplo,
                                    tempmm, tempnn, A->mb,
                                    alpha, A(k, k),  /* ldak * Y */
                                           B(m, k),  /* ldb  * Y */
                                    zbeta, C(m, n)); /* ldc  * Y */
                            }
                            else {
                                INSERT_TASK_zgemm(
                                    options,
                                    ChamNoTrans, ChamNoTrans,
                                    tempmm, tempnn, tempkn, A->mb,
                                    alpha, B(m, k),  /* ldb  * K */
                                           A(k, n),  /* ldak * Y */
                                    zbeta, C(m, n)); /* ldc  * Y */
                            }
                        }
                    }
                }
                /*
                 *  ChamRight / ChamUpper
                 */
                else {
                    for (k = 0; k < C->nt; k++) {
                        tempkn = C->get_blkdim( C, k, DIM_n, C->n );
                        zbeta = k == 0 ? beta : zone;
                        if (k < n) {
                            INSERT_TASK_zgemm(
                                options,
                                ChamNoTrans, ChamNoTrans,
                                tempmm, tempnn, tempkn, A->mb,
                                alpha, B(m, k),  /* ldb  * K */
                                       A(k, n),  /* ldak * Y */
                                zbeta, C(m, n)); /* ldc  * Y */
                        }
                        else {
                            if (k == n) {
                                INSERT_TASK_zsymm(
                                    options,
                                    side, uplo,
                                    tempmm, tempnn, A->mb,
                                    alpha, A(k, k),  /* ldak * Y */
                                           B(m, k),  /* ldb  * Y */
                                    zbeta, C(m, n)); /* ldc  * Y */
                            }
                            else {
                                INSERT_TASK_zgemm(
                                    options,
                                    ChamNoTrans, ChamTrans,
                                    tempmm, tempnn, tempkn, A->mb,
                                    alpha, B(m, k),  /* ldb * K */
                                           A(n, k),  /* lda * K */
                                    zbeta, C(m, n)); /* ldc * Y */
                            }
                        }
                    }
                }
            }
        }
    }
    (void)chamctxt;
}

/**
 *  Parallel tile symmetric matrix-matrix multiplication. wrapper.
 */
void
chameleon_pzsymm( struct chameleon_pzgemm_s *ws,
                  cham_side_t side, cham_uplo_t uplo,
                  CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B,
                  CHAMELEON_Complex64_t beta,  CHAM_desc_t *C,
                  RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
{
    CHAM_context_t *chamctxt;
    RUNTIME_option_t options;
    cham_gemm_t alg = (ws != NULL) ? ws->alg : ChamGemmAlgGeneric;

    chamctxt = chameleon_context_self();
    if (sequence->status != CHAMELEON_SUCCESS) {
        return;
    }
    RUNTIME_options_init( &options, chamctxt, sequence, request );

    switch( alg ) {
    case ChamGemmAlgAuto:
    case ChamGemmAlgSummaB: /* Switch back to generic since it does not exist yet. */
    case ChamGemmAlgGeneric:
        chameleon_pzsymm_generic( chamctxt, side, uplo, alpha, A, B, beta, C, &options );
        break;

    case ChamGemmAlgSummaC:
        chameleon_pzsymm_summa( chamctxt, side, uplo, alpha, A, B, beta, C,
                                &(ws->WA), &(ws->WB), &options );
        break;

    case ChamGemmAlgSummaA:
        if ( side == ChamLeft ) {
            chameleon_pzsymm_Astat( chamctxt, side, uplo, alpha, A, B, beta, C, &options );
        }
        else {
            chameleon_pzsymm_generic( chamctxt, side, uplo, alpha, A, B, beta, C, &options );
        }
        break;
    }

    RUNTIME_options_finalize( &options, chamctxt );
}