-
Mathieu Faverge authoredMathieu Faverge authored
pzlansy.c 16.38 KiB
/**
*
* @copyright (c) 2009-2014 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2014 Inria. All rights reserved.
* @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file pzlansy.c
*
* MORSE auxiliary routines
* MORSE is a software package provided by Inria Bordeaux - Sud-Ouest, LaBRI,
* University of Bordeaux, Bordeaux INP
*
* @version 2.6.0
* @comment This file has been automatically generated
* from Plasma 2.6.0 for MORSE 1.0.0
* @author Emmanuel Agullo
* @author Mathieu Faverge
* @date 2010-11-15
* @precisions normal z -> c d s
*
**/
//ALLOC_WS : A->mb
#include <stdlib.h>
#include <math.h>
//WS_ADD : A->mb
#include "control/common.h"
#define A(m, n) A, m, n
#define VECNORMS_STEP1(m, n) VECNORMS_STEP1, m, n
#define VECNORMS_STEP2(m, n) VECNORMS_STEP2, m, n
#define RESULT(m, n) RESULT, m, n
/*******************************************************************************
*
**/
/*******************************************************************************
*
**/
void morse_pzlansy(MORSE_enum norm, MORSE_enum uplo, MORSE_desc_t *A, double *result,
MORSE_sequence_t *sequence, MORSE_request_t *request)
{
MORSE_desc_t *VECNORMS_STEP1 = NULL;
MORSE_desc_t *VECNORMS_STEP2 = NULL;
MORSE_desc_t *RESULT = NULL;
MORSE_context_t *morse;
MORSE_option_t options;
int workm, workn;
int tempkm, tempkn;
int ldam;
int m, n;
/* int part_p, part_q; */
/* part_p = A->myrank / A->q; */
/* part_q = A->myrank % A->q; */
morse = morse_context_self();
if (sequence->status != MORSE_SUCCESS)
return;
RUNTIME_options_init(&options, morse, sequence, request);
*result = 0.0;
switch ( norm ) {
/*
* MorseOneNorm / MorseInfNorm
*/
case MorseOneNorm:
case MorseInfNorm:
/* Init workspace handle for the call to zlange */
RUNTIME_options_ws_alloc( &options, A->mb, 0 );
workm = A->m;
workn = chameleon_max( A->nt, A->q );
MORSE_Desc_Create(&(VECNORMS_STEP1), NULL, MorseRealDouble, A->mb, 1, A->mb,
workm, workn, 0, 0, workm, workn, A->p, A->q);
MORSE_Desc_Create(&(VECNORMS_STEP2), NULL, MorseRealDouble, A->mb, 1, A->mb,
workm, 1, 0, 0, workm, 1, A->p, A->q);
MORSE_Desc_Create(&(RESULT), NULL, MorseRealDouble, 1, 1, 1,
1, 1, 0, 0, 1, 1, 1, 1);
/* Zeroes my intermediate vectors */
for(m = 0; m < A->mt; m++) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
for(n = 0; n < workn; n++) {
MORSE_TASK_dlaset(
&options,
MorseUpperLower, tempkm, 1,
0., 0.,
VECNORMS_STEP1(m, n), 1);
}
}
for(m = 0; m < A->mt; m++) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m);
/* compute sums of absolute values on diagonal tile m */
MORSE_TASK_dzasum(
&options,
MorseRowwise, uplo, tempkm, tempkm,
A(m, m), ldam, VECNORMS_STEP1(m, m));
/*
* MorseLower
*/
if (uplo == MorseLower) {
//for(n = A->myrank % A->q; n < m; n+=A->q) {
for(n = 0; n < m; n++) {
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
/* compute sums of absolute values on rows of tile m */
MORSE_TASK_dzasum(
&options,
MorseRowwise, MorseUpperLower, tempkm, tempkn,
A(m, n), ldam, VECNORMS_STEP1(m, n));
/* same operation on the symmetric part */
MORSE_TASK_dzasum(
&options,
MorseColumnwise, MorseUpperLower, tempkm, tempkn,
A(m, n), ldam, VECNORMS_STEP1(n, m));
}
}
/*
* MorseUpper
*/
else {
// for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q );
// n < A->mt; n+=A->q) {
for(n = m+1; n < A->mt; n++) {
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
/* compute sums of absolute values on rows of tile m */
MORSE_TASK_dzasum(
&options,
MorseRowwise, MorseUpperLower, tempkm, tempkn,
A(m, n), ldam, VECNORMS_STEP1(m, n));
/* same operation on the symmetric part */
MORSE_TASK_dzasum(
&options,
MorseColumnwise, MorseUpperLower, tempkm, tempkn,
A(m, n), ldam, VECNORMS_STEP1(n, m));
}
}
}
/* compute vector sum between tiles in rows */
for(m = 0; m < A->mt; m++) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
MORSE_TASK_dlaset(
&options,
MorseUpperLower, tempkm, 1,
0., 0.,
VECNORMS_STEP2(m, 0), 1);
for(n = 0; n < A->nt; n++) {
MORSE_TASK_dgeadd(
&options,
MorseNoTrans, tempkm, 1, A->mb,
1.0, VECNORMS_STEP1(m, n), tempkm,
1.0, VECNORMS_STEP2(m, 0), tempkm);
}
/*
* Compute max norm of each segment of the final vector in the
* previous workspace
*/
MORSE_TASK_dlange(
&options,
MorseMaxNorm, tempkm, 1, A->nb,
VECNORMS_STEP2(m, 0), tempkm,
VECNORMS_STEP1(m, 0));
}
/* Initialize RESULT array */
MORSE_TASK_dlaset(
&options,
MorseUpperLower, 1, 1,
0., 0.,
RESULT(0,0), 1);
/* compute max norm between tiles in the column */
if (A->myrank % A->q == 0) {
for(m = 0; m < A->mt; m++) {
MORSE_TASK_dlange_max(
&options,
VECNORMS_STEP1(m, 0),
RESULT(0,0));
}
}
/* Scatter norm over processus */
for(m = 0; m < A->p; m++) {
for(n = 0; n < A->q; n++) {
MORSE_TASK_dlacpy(
&options,
MorseUpperLower, 1, 1, 1,
RESULT(0,0), 1,
VECNORMS_STEP1(m, n), 1 );
}
}
MORSE_Desc_Flush( VECNORMS_STEP2, sequence );
MORSE_Desc_Flush( VECNORMS_STEP1, sequence );
MORSE_Desc_Flush( RESULT, sequence );
RUNTIME_sequence_wait(morse, sequence);
*result = *(double *)VECNORMS_STEP1->get_blkaddr(VECNORMS_STEP1, A->myrank / A->q, A->myrank % A->q );
MORSE_Desc_Destroy( &(VECNORMS_STEP1) );
MORSE_Desc_Destroy( &(VECNORMS_STEP2) );
MORSE_Desc_Destroy( &(RESULT) );
break;
/*
* MorseFrobeniusNorm
*/
case MorseFrobeniusNorm:
workm = chameleon_max( A->mt, A->p );
workn = chameleon_max( A->nt, A->q );
MORSE_Desc_Create(&(VECNORMS_STEP1), NULL, MorseRealDouble, 1, 2, 2,
workm, 2*workn, 0, 0, workm, 2*workn, A->p, A->q);
MORSE_Desc_Create(&(RESULT), NULL, MorseRealDouble, 1, 2, 2,
1, 2, 0, 0, 1, 2, 1, 1);
/* Compute local norm to each tile */
for(m = (A->myrank / A->q); m < A->mt; m+=A->p) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m);
/* Zeroes my intermediate vector */
MORSE_TASK_dlaset(
&options,
MorseUpperLower, 1, 2,
1., 0.,
VECNORMS_STEP1(m, m), 1);
/* compute norm on diagonal tile m */
MORSE_TASK_zsyssq(
&options,
uplo, tempkm,
A(m, m), ldam,
VECNORMS_STEP1(m, m));
/*
* MorseLower
*/
if (uplo == MorseLower) {
//for(n = A->myrank % A->q; n < m; n+=A->q) {
for(n = 0; n < m; n++) {
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
/* Zeroes my intermediate vector */
MORSE_TASK_dlaset(
&options,
MorseUpperLower, 1, 2,
1., 0.,
VECNORMS_STEP1(m, n), 1);
/* compute norm on the lower part */
MORSE_TASK_zgessq(
&options,
tempkm, tempkn,
A(m, n), ldam,
VECNORMS_STEP1(m, n));
/* same operation on the symmetric part */
MORSE_TASK_zgessq(
&options,
tempkm, tempkn,
A(m, n), ldam,
VECNORMS_STEP1(m, n));
}
}
/*
* MorseUpper
*/
else {
// for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q );
// n < A->mt; n+=A->q) {
for(n = m+1; n < A->mt; n++) {
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
/* Zeroes my intermediate vector */
MORSE_TASK_dlaset(
&options,
MorseUpperLower, 1, 2,
1., 0.,
VECNORMS_STEP1(m, n), 1);
/* compute norm on the lower part */
MORSE_TASK_zgessq(
&options,
tempkm, tempkn,
A(m, n), ldam,
VECNORMS_STEP1(m, n));
/* same operation on the symmetric part */
MORSE_TASK_zgessq(
&options,
tempkm, tempkn,
A(m, n), ldam,
VECNORMS_STEP1(m, n));
}
}
}
/* Zeroes my intermediate vector */
MORSE_TASK_dlaset(
&options,
MorseUpperLower, 1, 2,
1., 0.,
RESULT(0,0), 1);
/* Compute accumulation of scl and ssq */
for(m = (A->myrank / A->q); m < A->mt; m+=A->p) {
/*
* MorseLower
*/
if (uplo == MorseLower) {
//for(n = A->myrank % A->q; n < m; n+=A->q) {
for(n = 0; n <= m; n++) {
MORSE_TASK_dplssq(
&options,
VECNORMS_STEP1(m, n),
RESULT(0,0));
}
}
/*
* MorseUpper
*/
else {
// for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q );
// n < A->mt; n+=A->q) {
for(n = m; n < A->mt; n++) {
MORSE_TASK_dplssq(
&options,
VECNORMS_STEP1(m, n),
RESULT(0,0));
}
}
}
/* Compute scl * sqrt(ssq) */
MORSE_TASK_dplssq2(
&options,
RESULT(0,0));
/* Copy max norm in tiles to dispatch on every nodes */
for(m = 0; m < A->p; m++) {
for(n = 0; n < A->q; n++) {
MORSE_TASK_dlacpy(
&options,
MorseUpperLower, 1, 1, 1,
RESULT(0,0), 1,
VECNORMS_STEP1(m, n), 1 );
}
}
MORSE_Desc_Flush( VECNORMS_STEP1, sequence );
MORSE_Desc_Flush( RESULT, sequence );
RUNTIME_sequence_wait(morse, sequence);
*result = *(double *)VECNORMS_STEP1->get_blkaddr(VECNORMS_STEP1, A->myrank / A->q, A->myrank % A->q );
MORSE_Desc_Destroy( &(VECNORMS_STEP1) );
MORSE_Desc_Destroy( &(RESULT) );
break;
/*
* MorseMaxNorm
*/
case MorseMaxNorm:
default:
/* Init workspace handle for the call to zlange but unused */
RUNTIME_options_ws_alloc( &options, 1, 0 );
workm = chameleon_max( A->mt, A->p );
workn = chameleon_max( A->nt, A->q );
MORSE_Desc_Create(&(VECNORMS_STEP1), NULL, MorseRealDouble, 1, 1, 1,
workm, workn, 0, 0, workm, workn, A->p, A->q);
MORSE_Desc_Create(&(RESULT), NULL, MorseRealDouble, 1, 1, 1,
1, 1, 0, 0, 1, 1, 1, 1);
/* Compute local maximum to each tile */
for(m = 0; m < A->mt; m++) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m);
MORSE_TASK_zlansy(
&options,
MorseMaxNorm, uplo, tempkm, A->nb,
A(m, m), ldam,
VECNORMS_STEP1(m, m));
/*
* MorseLower
*/
if (uplo == MorseLower) {
//for(n = A->myrank % A->q; n < m; n+=A->q) {
for(n = 0; n < m; n++) {
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
MORSE_TASK_zlange(
&options,
MorseMaxNorm, tempkm, tempkn, A->nb,
A(m, n), ldam,
VECNORMS_STEP1(m, n));
}
}
/*
* MorseUpper
*/
else {
//for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q );
// n < A->mt; n+=A->q) {
for(n = m+1; n < A->mt; n++) {
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
MORSE_TASK_zlange(
&options,
MorseMaxNorm, tempkm, tempkn, A->nb,
A(m, n), ldam,
VECNORMS_STEP1(m, n));
}
}
}
/* Zeroes RESULT array */
MORSE_TASK_dlaset(
&options,
MorseUpperLower, 1, 1,
0., 0.,
RESULT(0,0), 1);
/* Compute max norm between tiles */
for(m = 0; m < A->mt; m++) {
/*
* MorseLower
*/
if (uplo == MorseLower) {
//for(n = A->myrank % A->q; n < m; n+=A->q) {
for(n = 0; n <= m; n++) {
MORSE_TASK_dlange_max(
&options,
VECNORMS_STEP1(m, n),
RESULT(0,0));
}
}
/*
* MorseUpper
*/
else {
//for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q );
// n < A->mt; n+=A->q) {
for(n = m; n < A->mt; n++) {
MORSE_TASK_dlange_max(
&options,
VECNORMS_STEP1(m, n),
RESULT(0,0));
}
}
}
/* Copy max norm in tiles to dispatch on every nodes */
for(m = 0; m < A->p; m++) {
for(n = 0; n < A->q; n++) {
MORSE_TASK_dlacpy(
&options,
MorseUpperLower, 1, 1, 1,
RESULT(0,0), 1,
VECNORMS_STEP1(m, n), 1 );
}
}
MORSE_Desc_Flush( VECNORMS_STEP1, sequence );
MORSE_Desc_Flush( RESULT, sequence );
RUNTIME_sequence_wait(morse, sequence);
*result = *(double *)VECNORMS_STEP1->get_blkaddr(VECNORMS_STEP1, A->myrank / A->q, A->myrank % A->q );
MORSE_Desc_Destroy( &(VECNORMS_STEP1) );
MORSE_Desc_Destroy( &(RESULT) );
}
RUNTIME_options_ws_free(&options);
RUNTIME_options_finalize(&options, morse);
}