/** * * @copyright (c) 2009-2014 The University of Tennessee and The University * of Tennessee Research Foundation. * All rights reserved. * @copyright (c) 2012-2014 Inria. All rights reserved. * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. * **/ /** * * @file pzlansy.c * * MORSE auxiliary routines * MORSE is a software package provided by Inria Bordeaux - Sud-Ouest, LaBRI, * University of Bordeaux, Bordeaux INP * * @version 2.6.0 * @comment This file has been automatically generated * from Plasma 2.6.0 for MORSE 1.0.0 * @author Emmanuel Agullo * @author Mathieu Faverge * @date 2010-11-15 * @precisions normal z -> c d s * **/ //ALLOC_WS : A->mb #include <stdlib.h> #include <math.h> //WS_ADD : A->mb #include "control/common.h" #define A(m, n) A, m, n #define VECNORMS_STEP1(m, n) VECNORMS_STEP1, m, n #define VECNORMS_STEP2(m, n) VECNORMS_STEP2, m, n #define RESULT(m, n) RESULT, m, n /******************************************************************************* * **/ /******************************************************************************* * **/ void morse_pzlansy(MORSE_enum norm, MORSE_enum uplo, MORSE_desc_t *A, double *result, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_desc_t *VECNORMS_STEP1 = NULL; MORSE_desc_t *VECNORMS_STEP2 = NULL; MORSE_desc_t *RESULT = NULL; MORSE_context_t *morse; MORSE_option_t options; int workm, workn; int tempkm, tempkn; int ldam; int m, n; /* int part_p, part_q; */ /* part_p = A->myrank / A->q; */ /* part_q = A->myrank % A->q; */ morse = morse_context_self(); if (sequence->status != MORSE_SUCCESS) return; RUNTIME_options_init(&options, morse, sequence, request); *result = 0.0; switch ( norm ) { /* * MorseOneNorm / MorseInfNorm */ case MorseOneNorm: case MorseInfNorm: /* Init workspace handle for the call to zlange */ RUNTIME_options_ws_alloc( &options, A->mb, 0 ); workm = A->m; workn = chameleon_max( A->nt, A->q ); MORSE_Desc_Create(&(VECNORMS_STEP1), NULL, MorseRealDouble, A->mb, 1, A->mb, workm, workn, 0, 0, workm, workn, A->p, A->q); MORSE_Desc_Create(&(VECNORMS_STEP2), NULL, MorseRealDouble, A->mb, 1, A->mb, workm, 1, 0, 0, workm, 1, A->p, A->q); MORSE_Desc_Create(&(RESULT), NULL, MorseRealDouble, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1); /* Zeroes my intermediate vectors */ for(m = 0; m < A->mt; m++) { tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb; for(n = 0; n < workn; n++) { MORSE_TASK_dlaset( &options, MorseUpperLower, tempkm, 1, 0., 0., VECNORMS_STEP1(m, n), 1); } } for(m = 0; m < A->mt; m++) { tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb; ldam = BLKLDD(A, m); /* compute sums of absolute values on diagonal tile m */ MORSE_TASK_dzasum( &options, MorseRowwise, uplo, tempkm, tempkm, A(m, m), ldam, VECNORMS_STEP1(m, m)); /* * MorseLower */ if (uplo == MorseLower) { //for(n = A->myrank % A->q; n < m; n+=A->q) { for(n = 0; n < m; n++) { tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb; /* compute sums of absolute values on rows of tile m */ MORSE_TASK_dzasum( &options, MorseRowwise, MorseUpperLower, tempkm, tempkn, A(m, n), ldam, VECNORMS_STEP1(m, n)); /* same operation on the symmetric part */ MORSE_TASK_dzasum( &options, MorseColumnwise, MorseUpperLower, tempkm, tempkn, A(m, n), ldam, VECNORMS_STEP1(n, m)); } } /* * MorseUpper */ else { // for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q ); // n < A->mt; n+=A->q) { for(n = m+1; n < A->mt; n++) { tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb; /* compute sums of absolute values on rows of tile m */ MORSE_TASK_dzasum( &options, MorseRowwise, MorseUpperLower, tempkm, tempkn, A(m, n), ldam, VECNORMS_STEP1(m, n)); /* same operation on the symmetric part */ MORSE_TASK_dzasum( &options, MorseColumnwise, MorseUpperLower, tempkm, tempkn, A(m, n), ldam, VECNORMS_STEP1(n, m)); } } } /* compute vector sum between tiles in rows */ for(m = 0; m < A->mt; m++) { tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb; MORSE_TASK_dlaset( &options, MorseUpperLower, tempkm, 1, 0., 0., VECNORMS_STEP2(m, 0), 1); for(n = 0; n < A->nt; n++) { MORSE_TASK_dgeadd( &options, MorseNoTrans, tempkm, 1, A->mb, 1.0, VECNORMS_STEP1(m, n), tempkm, 1.0, VECNORMS_STEP2(m, 0), tempkm); } /* * Compute max norm of each segment of the final vector in the * previous workspace */ MORSE_TASK_dlange( &options, MorseMaxNorm, tempkm, 1, A->nb, VECNORMS_STEP2(m, 0), tempkm, VECNORMS_STEP1(m, 0)); } /* Initialize RESULT array */ MORSE_TASK_dlaset( &options, MorseUpperLower, 1, 1, 0., 0., RESULT(0,0), 1); /* compute max norm between tiles in the column */ if (A->myrank % A->q == 0) { for(m = 0; m < A->mt; m++) { MORSE_TASK_dlange_max( &options, VECNORMS_STEP1(m, 0), RESULT(0,0)); } } /* Scatter norm over processus */ for(m = 0; m < A->p; m++) { for(n = 0; n < A->q; n++) { MORSE_TASK_dlacpy( &options, MorseUpperLower, 1, 1, 1, RESULT(0,0), 1, VECNORMS_STEP1(m, n), 1 ); } } MORSE_Desc_Flush( VECNORMS_STEP2, sequence ); MORSE_Desc_Flush( VECNORMS_STEP1, sequence ); MORSE_Desc_Flush( RESULT, sequence ); RUNTIME_sequence_wait(morse, sequence); *result = *(double *)VECNORMS_STEP1->get_blkaddr(VECNORMS_STEP1, A->myrank / A->q, A->myrank % A->q ); MORSE_Desc_Destroy( &(VECNORMS_STEP1) ); MORSE_Desc_Destroy( &(VECNORMS_STEP2) ); MORSE_Desc_Destroy( &(RESULT) ); break; /* * MorseFrobeniusNorm */ case MorseFrobeniusNorm: workm = chameleon_max( A->mt, A->p ); workn = chameleon_max( A->nt, A->q ); MORSE_Desc_Create(&(VECNORMS_STEP1), NULL, MorseRealDouble, 1, 2, 2, workm, 2*workn, 0, 0, workm, 2*workn, A->p, A->q); MORSE_Desc_Create(&(RESULT), NULL, MorseRealDouble, 1, 2, 2, 1, 2, 0, 0, 1, 2, 1, 1); /* Compute local norm to each tile */ for(m = (A->myrank / A->q); m < A->mt; m+=A->p) { tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb; ldam = BLKLDD(A, m); /* Zeroes my intermediate vector */ MORSE_TASK_dlaset( &options, MorseUpperLower, 1, 2, 1., 0., VECNORMS_STEP1(m, m), 1); /* compute norm on diagonal tile m */ MORSE_TASK_zsyssq( &options, uplo, tempkm, A(m, m), ldam, VECNORMS_STEP1(m, m)); /* * MorseLower */ if (uplo == MorseLower) { //for(n = A->myrank % A->q; n < m; n+=A->q) { for(n = 0; n < m; n++) { tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb; /* Zeroes my intermediate vector */ MORSE_TASK_dlaset( &options, MorseUpperLower, 1, 2, 1., 0., VECNORMS_STEP1(m, n), 1); /* compute norm on the lower part */ MORSE_TASK_zgessq( &options, tempkm, tempkn, A(m, n), ldam, VECNORMS_STEP1(m, n)); /* same operation on the symmetric part */ MORSE_TASK_zgessq( &options, tempkm, tempkn, A(m, n), ldam, VECNORMS_STEP1(m, n)); } } /* * MorseUpper */ else { // for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q ); // n < A->mt; n+=A->q) { for(n = m+1; n < A->mt; n++) { tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb; /* Zeroes my intermediate vector */ MORSE_TASK_dlaset( &options, MorseUpperLower, 1, 2, 1., 0., VECNORMS_STEP1(m, n), 1); /* compute norm on the lower part */ MORSE_TASK_zgessq( &options, tempkm, tempkn, A(m, n), ldam, VECNORMS_STEP1(m, n)); /* same operation on the symmetric part */ MORSE_TASK_zgessq( &options, tempkm, tempkn, A(m, n), ldam, VECNORMS_STEP1(m, n)); } } } /* Zeroes my intermediate vector */ MORSE_TASK_dlaset( &options, MorseUpperLower, 1, 2, 1., 0., RESULT(0,0), 1); /* Compute accumulation of scl and ssq */ for(m = (A->myrank / A->q); m < A->mt; m+=A->p) { /* * MorseLower */ if (uplo == MorseLower) { //for(n = A->myrank % A->q; n < m; n+=A->q) { for(n = 0; n <= m; n++) { MORSE_TASK_dplssq( &options, VECNORMS_STEP1(m, n), RESULT(0,0)); } } /* * MorseUpper */ else { // for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q ); // n < A->mt; n+=A->q) { for(n = m; n < A->mt; n++) { MORSE_TASK_dplssq( &options, VECNORMS_STEP1(m, n), RESULT(0,0)); } } } /* Compute scl * sqrt(ssq) */ MORSE_TASK_dplssq2( &options, RESULT(0,0)); /* Copy max norm in tiles to dispatch on every nodes */ for(m = 0; m < A->p; m++) { for(n = 0; n < A->q; n++) { MORSE_TASK_dlacpy( &options, MorseUpperLower, 1, 1, 1, RESULT(0,0), 1, VECNORMS_STEP1(m, n), 1 ); } } MORSE_Desc_Flush( VECNORMS_STEP1, sequence ); MORSE_Desc_Flush( RESULT, sequence ); RUNTIME_sequence_wait(morse, sequence); *result = *(double *)VECNORMS_STEP1->get_blkaddr(VECNORMS_STEP1, A->myrank / A->q, A->myrank % A->q ); MORSE_Desc_Destroy( &(VECNORMS_STEP1) ); MORSE_Desc_Destroy( &(RESULT) ); break; /* * MorseMaxNorm */ case MorseMaxNorm: default: /* Init workspace handle for the call to zlange but unused */ RUNTIME_options_ws_alloc( &options, 1, 0 ); workm = chameleon_max( A->mt, A->p ); workn = chameleon_max( A->nt, A->q ); MORSE_Desc_Create(&(VECNORMS_STEP1), NULL, MorseRealDouble, 1, 1, 1, workm, workn, 0, 0, workm, workn, A->p, A->q); MORSE_Desc_Create(&(RESULT), NULL, MorseRealDouble, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1); /* Compute local maximum to each tile */ for(m = 0; m < A->mt; m++) { tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb; ldam = BLKLDD(A, m); MORSE_TASK_zlansy( &options, MorseMaxNorm, uplo, tempkm, A->nb, A(m, m), ldam, VECNORMS_STEP1(m, m)); /* * MorseLower */ if (uplo == MorseLower) { //for(n = A->myrank % A->q; n < m; n+=A->q) { for(n = 0; n < m; n++) { tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb; MORSE_TASK_zlange( &options, MorseMaxNorm, tempkm, tempkn, A->nb, A(m, n), ldam, VECNORMS_STEP1(m, n)); } } /* * MorseUpper */ else { //for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q ); // n < A->mt; n+=A->q) { for(n = m+1; n < A->mt; n++) { tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb; MORSE_TASK_zlange( &options, MorseMaxNorm, tempkm, tempkn, A->nb, A(m, n), ldam, VECNORMS_STEP1(m, n)); } } } /* Zeroes RESULT array */ MORSE_TASK_dlaset( &options, MorseUpperLower, 1, 1, 0., 0., RESULT(0,0), 1); /* Compute max norm between tiles */ for(m = 0; m < A->mt; m++) { /* * MorseLower */ if (uplo == MorseLower) { //for(n = A->myrank % A->q; n < m; n+=A->q) { for(n = 0; n <= m; n++) { MORSE_TASK_dlange_max( &options, VECNORMS_STEP1(m, n), RESULT(0,0)); } } /* * MorseUpper */ else { //for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q ); // n < A->mt; n+=A->q) { for(n = m; n < A->mt; n++) { MORSE_TASK_dlange_max( &options, VECNORMS_STEP1(m, n), RESULT(0,0)); } } } /* Copy max norm in tiles to dispatch on every nodes */ for(m = 0; m < A->p; m++) { for(n = 0; n < A->q; n++) { MORSE_TASK_dlacpy( &options, MorseUpperLower, 1, 1, 1, RESULT(0,0), 1, VECNORMS_STEP1(m, n), 1 ); } } MORSE_Desc_Flush( VECNORMS_STEP1, sequence ); MORSE_Desc_Flush( RESULT, sequence ); RUNTIME_sequence_wait(morse, sequence); *result = *(double *)VECNORMS_STEP1->get_blkaddr(VECNORMS_STEP1, A->myrank / A->q, A->myrank % A->q ); MORSE_Desc_Destroy( &(VECNORMS_STEP1) ); MORSE_Desc_Destroy( &(RESULT) ); } RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); }