Mentions légales du service

Skip to content
Snippets Groups Projects
Commit b7615ff3 authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

Feature/lange

parent 3733c3d1
Branches
Tags
No related merge requests found
......@@ -116,9 +116,7 @@ set(ZSRC
pzgetrf_nopiv.c
pzlacpy.c
pzlange.c
pzlanhe.c
pzlansy.c
pzlantr.c
pzlaset2.c
pzlaset.c
pzlauum.c
......
......@@ -26,323 +26,411 @@
//WS_ADD : A->mb + A->nb
#include "control/common.h"
#define A(m, n) A, m, n
#define VECNORMS_STEP1(m, n) VECNORMS_STEP1, m, n
#define VECNORMS_STEP2(m, n) VECNORMS_STEP2, m, n
#define RESULT(m, n) RESULT, m, n
/**
*
*/
void chameleon_pzlange( cham_normtype_t norm, CHAM_desc_t *A, double *result,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
#define A(m, n) A, (m), (n)
#define Wcol(m, n) Wcol, (m), (n)
#define Welt(m, n) Welt, (m), (n)
static inline void
chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
CHAM_desc_t *Wcol, CHAM_desc_t *Welt,
RUNTIME_option_t *options)
{
CHAM_desc_t *VECNORMS_STEP1 = NULL;
CHAM_desc_t *VECNORMS_STEP2 = NULL;
CHAM_desc_t *RESULT = NULL;
CHAM_context_t *chamctxt;
RUNTIME_option_t options;
int workm, workn;
int tempkm, tempkn;
int ldam;
int m, n;
int minMNT = chameleon_min( A->mt, A->nt );
int minMN = chameleon_min( A->m, A->n );
int MT = (uplo == ChamUpper) ? minMNT : A->mt;
int NT = (uplo == ChamLower) ? minMNT : A->nt;
int M = (uplo == ChamUpper) ? minMN : A->m;
int N = (uplo == ChamLower) ? minMN : A->n;
int P = Welt->p;
int Q = Welt->q;
/**
* Step 1:
* For j in [1,P], W(i, n) = reduce( A(i+k*P, n) )
*/
for(n = 0; n < NT; n++) {
int mmin = ( uplo == ChamLower ) ? n : 0;
int mmax = ( uplo == ChamUpper ) ? chameleon_min(n+1, MT) : MT;
int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb;
for(m = mmin; m < mmax; m++) {
int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb;
int ldam = BLKLDD( A, m );
if ( (n == m) && (uplo != ChamUpperLower) ) {
INSERT_TASK_ztrasm(
options,
ChamColumnwise, uplo, diag, tempmm, tempnn,
A(m, n), ldam, Wcol(m, n) );
}
else {
INSERT_TASK_dzasum(
options,
ChamColumnwise, ChamUpperLower, tempmm, tempnn,
A(m, n), ldam, Wcol(m, n) );
}
chamctxt = chameleon_context_self();
if (sequence->status != CHAMELEON_SUCCESS)
return;
RUNTIME_options_init(&options, chamctxt, sequence, request);
if ( m >= P ) {
INSERT_TASK_dgeadd(
options,
ChamNoTrans, tempnn, 1, A->nb,
1.0, Wcol(m, n), tempnn,
1.0, Wcol(m%P, n), tempnn );
}
}
*result = 0.0;
switch ( norm ) {
/*
* ChamOneNorm
/**
* Step 2:
* For each i, W(i, n) = reduce( W(0..P-1, n) )
*/
case ChamOneNorm:
/* Init workspace handle for the call to zlange but unused */
RUNTIME_options_ws_alloc( &options, 1, 0 );
workm = chameleon_max( A->mt, A->p );
workn = A->n;
CHAMELEON_Desc_Create(&(VECNORMS_STEP1), NULL, ChamRealDouble, 1, A->nb, A->nb,
workm, workn, 0, 0, workm, workn, A->p, A->q);
for(m = 1; m < P; n++) {
INSERT_TASK_dgeadd(
options,
ChamNoTrans, tempnn, 1, A->nb,
1.0, Wcol(m, n), tempnn,
1.0, Wcol(0, n), tempnn );
}
CHAMELEON_Desc_Create(&(VECNORMS_STEP2), NULL, ChamRealDouble, 1, A->nb, A->nb,
1, workn, 0, 0, 1, workn, A->p, A->q);
INSERT_TASK_dlange(
options,
ChamMaxNorm, tempnn, 1, A->nb,
Wcol(0, n), tempnn, Welt(0, n));
}
CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 1, 1,
1, 1, 0, 0, 1, 1, 1, 1);
/**
* Step 3:
* For n in 0..Q-1, W(m, n) = max( W(m, n..nt[Q] ) )
*/
for(n = Q; n < NT; n++) {
INSERT_TASK_dlange_max(
options,
Welt(0, n), Welt(0, n%Q) );
}
for(n = A->myrank % A->q; n < A->nt; n+=A->q) {
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
/**
* Step 4:
* For each i, Welt(i, n) = max( Welt(0..P-1, n) )
*/
for(n = 1; n < Q; n++) {
INSERT_TASK_dlange_max(
options,
Welt(0, n), Welt(0, 0) );
}
}
/* Zeroes my intermediate vectors */
for(m = (A->myrank / A->q); m < workm; m+=A->p) {
INSERT_TASK_dlaset(
&options,
ChamUpperLower, 1, tempkn,
0., 0.,
VECNORMS_STEP1(m, n), 1);
static inline void
chameleon_pzlange_inf( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
CHAM_desc_t *Wcol, CHAM_desc_t *Welt,
RUNTIME_option_t *options)
{
int m, n;
int minMNT = chameleon_min( A->mt, A->nt );
int minMN = chameleon_min( A->m, A->n );
int MT = (uplo == ChamUpper) ? minMNT : A->mt;
int NT = (uplo == ChamLower) ? minMNT : A->nt;
int M = (uplo == ChamUpper) ? minMN : A->m;
int N = (uplo == ChamLower) ? minMN : A->n;
int P = Welt->p;
int Q = Welt->q;
/**
* Step 1:
* For j in [1,Q], Wcol(m, j) = reduce( A(m, j+k*Q) )
*/
for(m = 0; m < MT; m++) {
int nmin = ( uplo == ChamUpper ) ? m : 0;
int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT;
int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb;
int ldam = BLKLDD( A, m );
for(n = nmin; n < nmax; n++) {
int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb;
if ( (n == m) && (uplo != ChamUpperLower) ) {
INSERT_TASK_ztrasm(
options,
ChamRowwise, uplo, diag, tempmm, tempnn,
A(m, n), ldam, Wcol(m, n) );
}
/* compute sums of absolute values on columns of each tile */
for(m = (A->myrank / A->q); m < A->mt; m+=A->p) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m);
else {
INSERT_TASK_dzasum(
&options,
ChamColumnwise, ChamUpperLower, tempkm, tempkn,
A(m, n), ldam, VECNORMS_STEP1(m, n));
options,
ChamRowwise, ChamUpperLower, tempmm, tempnn,
A(m, n), ldam, Wcol(m, n) );
}
/* Zeroes the second intermediate vector */
INSERT_TASK_dlaset(
&options,
ChamUpperLower, 1, tempkn,
0., 0.,
VECNORMS_STEP2(0, n), 1);
/* Compute vector sums between tiles in columns */
for(m = 0; m < A->mt; m++) {
if ( n >= Q ) {
INSERT_TASK_dgeadd(
&options,
ChamNoTrans, 1, tempkn, A->mb,
1.0, VECNORMS_STEP1(m, n), 1,
1.0, VECNORMS_STEP2(0, n), 1);
options,
ChamNoTrans, tempmm, 1, A->mb,
1.0, Wcol(m, n ), tempmm,
1.0, Wcol(m, n%Q), tempmm );
}
}
/*
* Compute max norm of each segment of the final vector in the
* previous workspace
*/
INSERT_TASK_dlange(
&options,
ChamMaxNorm, 1, tempkn, A->nb,
VECNORMS_STEP2(0, n), 1,
VECNORMS_STEP1(0, n));
/**
* Step 2:
* For each j, W(m, j) = reduce( Wcol(m, 0..Q-1) )
*/
for(n = 1; n < Q; n++) {
INSERT_TASK_dgeadd(
options,
ChamNoTrans, tempmm, 1, A->mb,
1.0, Wcol(m, n), tempmm,
1.0, Wcol(m, 0), tempmm );
}
/* Initialize RESULT array */
INSERT_TASK_dlaset(
&options,
ChamUpperLower, 1, 1,
0., 0.,
RESULT(0,0), 1);
INSERT_TASK_dlange(
options,
ChamMaxNorm, tempmm, 1, A->nb,
Wcol(m, 0), 1, Welt(m, 0));
}
/**
* Step 3:
* For m in 0..P-1, Welt(m, n) = max( Wcol(m..mt[P], n ) )
*/
for(m = P; m < MT; m++) {
INSERT_TASK_dlange_max(
options,
Welt(m, 0), Welt(m%P, 0) );
}
/**
* Step 4:
* For each i, Welt(i, n) = max( Welt(0..P-1, n) )
*/
for(m = 1; m < P; m++) {
INSERT_TASK_dlange_max(
options,
Welt(m, 0), Welt(0, 0) );
}
}
static inline void
chameleon_pzlange_max( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_desc_t *Welt,
RUNTIME_option_t *options)
{
int m, n;
int minMNT = chameleon_min( A->mt, A->nt );
int minMN = chameleon_min( A->m, A->n );
int MT = (uplo == ChamUpper) ? minMNT : A->mt;
int NT = (uplo == ChamLower) ? minMNT : A->nt;
int M = (uplo == ChamUpper) ? minMN : A->m;
int N = (uplo == ChamLower) ? minMN : A->n;
int P = Welt->p;
int Q = Welt->q;
/**
* Step 1:
* For j in [1,Q], Welt(m, j) = reduce( A(m, j+k*Q) )
*/
for(m = 0; m < MT; m++) {
int nmin = ( uplo == ChamUpper ) ? m : 0;
int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT;
int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb;
int ldam = BLKLDD( A, m );
for(n = nmin; n < nmax; n++) {
int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb;
if ( (n == m) && (uplo != ChamUpperLower) ) {
INSERT_TASK_zlantr(
options,
ChamMaxNorm, uplo, diag, tempmm, tempnn, A->nb,
A(m, n), ldam, Welt(m, n));
}
else {
INSERT_TASK_zlange(
options,
ChamMaxNorm, tempmm, tempnn, A->nb,
A(m, n), ldam, Welt(m, n));
}
/* Compute max norm between tiles in the row */
if (A->myrank < A->q) {
for(n = 0; n < A->nt; n++) {
if ( n >= Q ) {
INSERT_TASK_dlange_max(
&options,
VECNORMS_STEP1(0, n),
RESULT(0,0));
options,
Welt(m, n), Welt(m, n%Q) );
}
}
/* Scatter norm over processus */
for(m = 0; m < A->p; m++) {
for(n = 0; n < A->q; n++) {
INSERT_TASK_dlacpy(
&options,
ChamUpperLower, 1, 1, 1,
RESULT(0,0), 1,
VECNORMS_STEP1(m, n), 1 );
/**
* Step 2:
* For each j, W(m, j) = reduce( Welt(m, 0..Q-1) )
*/
for(n = 1; n < Q; n++) {
INSERT_TASK_dlange_max(
options,
Welt(m, n), Welt(m, 0) );
}
}
/**
* Step 3:
* For m in 0..P-1, Welt(m, n) = max( Welt(m..mt[P], n ) )
*/
for(m = P; m < MT; m++) {
INSERT_TASK_dlange_max(
options,
Welt(m, 0), Welt(m%P, 0) );
}
/**
* Step 4:
* For each i, Welt(i, n) = max( Welt(0..P-1, n) )
*/
for(m = 1; m < P; m++) {
INSERT_TASK_dlange_max(
options,
Welt(m, 0), Welt(0, 0) );
}
}
static inline void
chameleon_pzlange_frb( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_desc_t *Welt,
RUNTIME_option_t *options)
{
int m, n;
int minMNT = chameleon_min( A->mt, A->nt );
int minMN = chameleon_min( A->m, A->n );
int MT = (uplo == ChamUpper) ? minMNT : A->mt;
int NT = (uplo == ChamLower) ? minMNT : A->nt;
int M = (uplo == ChamUpper) ? minMN : A->m;
int N = (uplo == ChamLower) ? minMN : A->n;
int P = Welt->p;
int Q = Welt->q;
/**
* Step 1:
* For j in [1,Q], Welt(m, j) = reduce( A(m, j+k*Q) )
*/
for(m = 0; m < MT; m++) {
int nmin = ( uplo == ChamUpper ) ? m : 0;
int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT;
int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb;
int ldam = BLKLDD( A, m );
for(n = nmin; n < nmax; n++) {
int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb;
if ( (n == m) && (uplo != ChamUpperLower) ) {
INSERT_TASK_ztrssq(
options,
uplo, diag, tempmm, tempnn,
A(m, n), ldam, Welt(m, n) );
}
else {
INSERT_TASK_zgessq(
options,
tempmm, tempnn,
A(m, n), ldam, Welt(m, n) );
}
if ( n >= Q ) {
INSERT_TASK_dplssq(
options, Welt(m, n), Welt(m, n%Q) );
}
}
CHAMELEON_Desc_Flush( VECNORMS_STEP2, sequence );
CHAMELEON_Desc_Flush( VECNORMS_STEP1, sequence );
CHAMELEON_Desc_Flush( RESULT, sequence );
RUNTIME_sequence_wait(chamctxt, sequence);
CHAMELEON_Desc_Destroy( &(VECNORMS_STEP2) );
break;
/*
* ChamInfNorm
/**
* Step 2:
* For each j, W(m, j) = reduce( Welt(m, 0..Q-1) )
*/
case ChamInfNorm:
/* Init workspace handle for the call to zlange */
RUNTIME_options_ws_alloc( &options, A->mb, 0 );
for(n = 1; n < Q; n++) {
INSERT_TASK_dplssq(
options, Welt(m, n), Welt(m, 0) );
}
}
workm = A->m;
workn = chameleon_max( A->nt, A->q );
CHAMELEON_Desc_Create(&(VECNORMS_STEP1), NULL, ChamRealDouble, A->mb, 1, A->mb,
workm, workn, 0, 0, workm, workn, A->p, A->q);
/**
* Step 3:
* For m in 0..P-1, Welt(m, n) = max( Welt(m..mt[P], n ) )
*/
for(m = P; m < MT; m++) {
INSERT_TASK_dplssq(
options, Welt(m, 0), Welt(m%P, 0) );
}
CHAMELEON_Desc_Create(&(VECNORMS_STEP2), NULL, ChamRealDouble, A->mb, 1, A->mb,
workm, 1, 0, 0, workm, 1, A->p, A->q);
/**
* Step 4:
* For each i, Welt(i, n) = max( Welt(0..P-1, n) )
*/
for(m = 1; m < P; m++) {
INSERT_TASK_dplssq(
options, Welt(m, 0), Welt(0, 0) );
}
CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 1, 1,
A->p, A->q, 0, 0, A->p, A->q, A->p, A->q);
INSERT_TASK_dplssq2(
options, Welt(0, 0) );
}
for(m = (A->myrank / A->q); m < A->mt; m+=A->p) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m);
/**
*
*/
void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
CHAM_desc_t *A, double *result,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
{
CHAM_context_t *chamctxt;
RUNTIME_option_t options;
CHAM_desc_t *Wcol = NULL;
CHAM_desc_t *Welt = NULL;
double alpha = 0.0;
double beta = 0.0;
/* Zeroes my intermediate vectors */
for(n = A->myrank % A->q; n < workn; n+=A->q) {
INSERT_TASK_dlaset(
&options,
ChamUpperLower, tempkm, 1,
0., 0.,
VECNORMS_STEP1(m, n), 1);
}
int workn, workmt, worknt;
int m, n;
/* compute sums of absolute values on rows of each tile */
for(n = A->myrank % A->q; n < A->nt; n+=A->q) {
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
INSERT_TASK_dzasum(
&options,
ChamRowwise, ChamUpperLower, tempkm, tempkn,
A(m, n), ldam, VECNORMS_STEP1(m, n));
}
chamctxt = chameleon_context_self();
if (sequence->status != CHAMELEON_SUCCESS)
return;
RUNTIME_options_init(&options, chamctxt, sequence, request);
/* Zeroes the second intermediate vector */
INSERT_TASK_dlaset(
&options,
ChamUpperLower, tempkm, 1,
0., 0.,
VECNORMS_STEP2(m, 0), 1);
*result = 0.0;
/* compute vector sums between tiles in rows locally on each rank */
for(n = A->myrank % A->q + A->q; n < A->nt; n+=A->q) {
INSERT_TASK_dgeadd(
&options,
ChamNoTrans, tempkm, 1, A->mb,
1.0, VECNORMS_STEP1(m, n), tempkm,
1.0, VECNORMS_STEP1(m, A->myrank % A->q), tempkm);
}
workmt = chameleon_max( A->mt, A->p );
worknt = chameleon_max( A->nt, A->q );
workn = chameleon_max( A->n, A->q );
/* compute vector sums between tiles in rows between ranks */
for(n = 0; n < A->q; n++) {
INSERT_TASK_dgeadd(
&options,
ChamNoTrans, tempkm, 1, A->mb,
1.0, VECNORMS_STEP1(m, n), tempkm,
1.0, VECNORMS_STEP2(m, 0), tempkm);
}
switch ( norm ) {
case ChamOneNorm:
RUNTIME_options_ws_alloc( &options, 1, 0 );
/*
* Compute max norm of each segment of the final vector in the
* previous workspace
*/
INSERT_TASK_dlange(
&options,
ChamMaxNorm, tempkm, 1, A->nb,
VECNORMS_STEP2(m, 0), tempkm,
VECNORMS_STEP1(m, 0));
}
CHAMELEON_Desc_Create( &Wcol, NULL, ChamRealDouble, 1, A->nb, A->nb,
workmt, worknt * A->nb, 0, 0, workmt, worknt * A->nb, A->p, A->q );
/* Initialize RESULT array */
INSERT_TASK_dlaset(
&options,
ChamUpperLower, 1, 1,
0., 0.,
RESULT(A->myrank / A->q, A->myrank % A->q), 1);
CHAMELEON_Desc_Create( &Welt, NULL, ChamRealDouble, 1, 1, 1,
A->p, worknt, 0, 0, A->p, worknt, A->p, A->q );
/* compute max norm between tiles in the column locally on each rank */
if (A->myrank % A->q == 0) {
for(m = (A->myrank / A->q); m < A->mt; m+=A->p) {
INSERT_TASK_dlange_max(
&options,
VECNORMS_STEP1(m, 0),
RESULT(A->myrank / A->q, A->myrank % A->q));
}
}
break;
/* compute max norm between tiles in the column between ranks */
if (A->myrank % A->q == 0) {
for(m = 0; m < A->p; m++) {
INSERT_TASK_dlange_max(
&options,
RESULT(m,0),
RESULT(0,0));
}
}
/*
* ChamInfNorm
*/
case ChamInfNorm:
RUNTIME_options_ws_alloc( &options, A->mb, 0 );
/* Scatter norm over processus */
for(m = 0; m < A->p; m++) {
for(n = 0; n < A->q; n++) {
INSERT_TASK_dlacpy(
&options,
ChamUpperLower, 1, 1, 1,
RESULT(0,0), 1,
VECNORMS_STEP1(m, n), 1 );
}
}
CHAMELEON_Desc_Flush( VECNORMS_STEP2, sequence );
CHAMELEON_Desc_Flush( VECNORMS_STEP1, sequence );
CHAMELEON_Desc_Flush( RESULT, sequence );
RUNTIME_sequence_wait(chamctxt, sequence);
CHAMELEON_Desc_Destroy( &(VECNORMS_STEP2) );
CHAMELEON_Desc_Create( &Wcol, NULL, ChamRealDouble, A->mb, 1, A->mb,
workmt * A->mb, worknt, 0, 0, workmt * A->mb, worknt, A->p, A->q );
CHAMELEON_Desc_Create( &Welt, NULL, ChamRealDouble, 1, 1, 1,
workmt, A->q, 0, 0, workmt, A->q, A->p, A->q );
break;
/*
* ChamFrobeniusNorm
*/
case ChamFrobeniusNorm:
RUNTIME_options_ws_alloc( &options, 1, 0 );
workm = chameleon_max( A->mt, A->p );
workn = chameleon_max( A->nt, A->q );
CHAMELEON_Desc_Create(&(VECNORMS_STEP1), NULL, ChamRealDouble, 1, 2, 2,
workm, 2*workn, 0, 0, workm, 2*workn, A->p, A->q);
CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 2, 2,
1, 2, 0, 0, 1, 2, 1, 1);
/* Compute local norm to each tile */
for(m = 0; m < A->mt; m++) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m);
for(n = 0; n < A->nt; n++) {
INSERT_TASK_dlaset(
&options,
ChamUpperLower, 1, 2,
1., 0.,
VECNORMS_STEP1(m,n), 1);
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
INSERT_TASK_zgessq(
&options,
tempkm, tempkn,
A(m, n), ldam,
VECNORMS_STEP1(m, n));
}
}
/* Initialize arrays */
INSERT_TASK_dlaset(
&options,
ChamUpperLower, 1, 2,
1., 0.,
RESULT(0,0), 1);
/* Compute accumulation of scl and ssq */
for(m = 0; m < A->mt; m++) {
for(n = 0; n < A->nt; n++) {
INSERT_TASK_dplssq(
&options,
VECNORMS_STEP1(m, n),
RESULT(0,0));
}
}
/* Compute scl * sqrt(ssq) */
INSERT_TASK_dplssq2(
&options,
RESULT(0,0));
/* Copy max norm in tiles to dispatch on every nodes */
for(m = 0; m < A->p; m++) {
for(n = 0; n < A->q; n++) {
INSERT_TASK_dlacpy(
&options,
ChamUpperLower, 1, 1, 1,
RESULT(0,0), 1,
VECNORMS_STEP1(m, n), 1 );
}
}
CHAMELEON_Desc_Flush( VECNORMS_STEP1, sequence );
CHAMELEON_Desc_Flush( RESULT, sequence );
RUNTIME_sequence_wait(chamctxt, sequence);
alpha = 1.;
CHAMELEON_Desc_Create( &Welt, NULL, ChamRealDouble, 2, 1, 2,
workmt*2, workn, 0, 0, workmt*2, workn, A->p, A->q );
break;
/*
......@@ -350,68 +438,81 @@ void chameleon_pzlange( cham_normtype_t norm, CHAM_desc_t *A, double *result,
*/
case ChamMaxNorm:
default:
/* Init workspace handle for the call to zlange but unused */
RUNTIME_options_ws_alloc( &options, 1, 0 );
workm = chameleon_max( A->mt, A->p );
workn = chameleon_max( A->nt, A->q );
CHAMELEON_Desc_Create(&(VECNORMS_STEP1), NULL, ChamRealDouble, 1, 1, 1,
workm, workn, 0, 0, workm, workn, A->p, A->q);
CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 1, 1,
1, 1, 0, 0, 1, 1, 1, 1);
CHAMELEON_Desc_Create( &Welt, NULL, ChamRealDouble, 1, 1, 1,
workmt, workn, 0, 0, workmt, workn, A->p, A->q );
}
/* Compute local maximum to each tile */
for(m = 0; m < A->mt; m++) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m);
for(n = 0; n < A->nt; n++) {
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
INSERT_TASK_zlange(
/* Initialize workspaces */
if ( (norm == ChamInfNorm) ||
(norm == ChamOneNorm) )
{
/* Initialize Wcol tile */
for(m = 0; m < Wcol->mt; m++) {
for(n = 0; n < Wcol->nt; n++) {
INSERT_TASK_dlaset(
&options,
ChamMaxNorm, tempkm, tempkn, A->nb,
A(m, n), ldam,
VECNORMS_STEP1(m, n));
ChamUpperLower, Wcol->mb, Wcol->nb,
alpha, beta,
Wcol(m,n), Wcol->mb );
}
}
}
for(m = 0; m < Welt->mt; m++) {
for(n = 0; n < Welt->nt; n++) {
INSERT_TASK_dlaset(
&options,
ChamUpperLower, Welt->mb, Welt->nb,
alpha, beta,
Welt(m,n), Welt->mb );
}
}
/* Initialize RESULT array */
INSERT_TASK_dlaset(
&options,
ChamUpperLower, 1, 1,
0., 0.,
RESULT(0,0), 1);
switch ( norm ) {
case ChamOneNorm:
chameleon_pzlange_one( uplo, diag, A, Wcol, Welt, &options );
CHAMELEON_Desc_Flush( Wcol, sequence );
break;
/* Compute max norm between tiles */
for(m = 0; m < A->mt; m++) {
for(n = 0; n < A->nt; n++) {
INSERT_TASK_dlange_max(
&options,
VECNORMS_STEP1(m, n),
RESULT(0,0));
}
}
case ChamInfNorm:
chameleon_pzlange_inf( uplo, diag, A, Wcol, Welt, &options );
CHAMELEON_Desc_Flush( Wcol, sequence );
break;
/* Copy max norm in tiles to dispatch on every nodes */
for(m = 0; m < A->p; m++) {
for(n = 0; n < A->q; n++) {
case ChamFrobeniusNorm:
chameleon_pzlange_frb( uplo, diag, A, Welt, &options );
break;
case ChamMaxNorm:
default:
chameleon_pzlange_max( uplo, diag, A, Welt, &options );
}
/**
* Broadcast the result
*/
for(m = 0; m < A->p; m++) {
for(n = 0; n < A->q; n++) {
if ( (m != 0) && (n != 0) ) {
INSERT_TASK_dlacpy(
&options,
ChamUpperLower, 1, 1, 1,
RESULT(0,0), 1,
VECNORMS_STEP1(m, n), 1 );
Welt(0,0), 1, Welt(m, n), 1);
}
}
CHAMELEON_Desc_Flush( VECNORMS_STEP1, sequence );
CHAMELEON_Desc_Flush( RESULT, sequence );
RUNTIME_sequence_wait(chamctxt, sequence);
}
*result = *(double *)VECNORMS_STEP1->get_blkaddr(VECNORMS_STEP1, A->myrank / A->q, A->myrank % A->q );
CHAMELEON_Desc_Flush( Welt, sequence );
RUNTIME_sequence_wait(chamctxt, sequence);
*result = *(double *)Welt->get_blkaddr(Welt, A->myrank / A->q, A->myrank % A->q );
if ( Wcol != NULL ) {
CHAMELEON_Desc_Destroy( &Wcol );
}
CHAMELEON_Desc_Destroy( &Welt );
CHAMELEON_Desc_Destroy( &(VECNORMS_STEP1) );
CHAMELEON_Desc_Destroy( &(RESULT) );
RUNTIME_options_ws_free(&options);
RUNTIME_options_finalize(&options, chamctxt);
}
/**
*
* @file pzlanhe.c
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zlanhe parallel algorithm
*
* @version 1.0.0
* @comment This file has been automatically generated
* from Plasma 2.6.0 for CHAMELEON 1.0.0
* @author Emmanuel Agullo
* @author Mathieu Faverge
* @date 2010-11-15
* @precisions normal z -> c
*
*/
//ALLOC_WS : A->mb
//#include <stdlib.h>
//#include <math.h>
//WS_ADD : A->mb
#include "control/common.h"
#define A(m, n) A, m, n
#define VECNORMS_STEP1(m, n) VECNORMS_STEP1, m, n
#define VECNORMS_STEP2(m, n) VECNORMS_STEP2, m, n
#define RESULT(m, n) RESULT, m, n
/**
*
*/
/**
*
*/
void chameleon_pzlanhe(cham_normtype_t norm, cham_uplo_t uplo, CHAM_desc_t *A, double *result,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request)
{
CHAM_desc_t *VECNORMS_STEP1 = NULL;
CHAM_desc_t *VECNORMS_STEP2 = NULL;
CHAM_desc_t *RESULT = NULL;
CHAM_context_t *chamctxt;
RUNTIME_option_t options;
int workm, workn;
int tempkm, tempkn;
int ldam;
int m, n;
/* int part_p, part_q; */
/* part_p = A->myrank / A->q; */
/* part_q = A->myrank % A->q; */
chamctxt = chameleon_context_self();
if (sequence->status != CHAMELEON_SUCCESS)
return;
RUNTIME_options_init(&options, chamctxt, sequence, request);
*result = 0.0;
switch ( norm ) {
/*
* ChamOneNorm / ChamInfNorm
*/
case ChamOneNorm:
case ChamInfNorm:
/* Init workspace handle for the call to zlanhe */
RUNTIME_options_ws_alloc( &options, A->mb, 0 );
workm = A->m;
workn = chameleon_max( A->nt, A->q );
CHAMELEON_Desc_Create(&(VECNORMS_STEP1), NULL, ChamRealDouble, A->mb, 1, A->mb,
workm, workn, 0, 0, workm, workn, A->p, A->q);
CHAMELEON_Desc_Create(&(VECNORMS_STEP2), NULL, ChamRealDouble, A->mb, 1, A->mb,
workm, 1, 0, 0, workm, 1, A->p, A->q);
CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 1, 1,
1, 1, 0, 0, 1, 1, 1, 1);
/* Zeroes my intermediate vectors */
for(m = 0; m < A->mt; m++) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
for(n = 0; n < workn; n++) {
INSERT_TASK_dlaset(
&options,
ChamUpperLower, tempkm, 1,
0., 0.,
VECNORMS_STEP1(m, n), 1);
}
}
for(m = (A->myrank / A->q); m < A->mt; m+=A->p) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m);
/* compute sums of absolute values on diagonal tile m */
INSERT_TASK_dzasum(
&options,
ChamRowwise, uplo, tempkm, tempkm,
A(m, m), ldam, VECNORMS_STEP1(m, m));
/*
* ChamLower
*/
if (uplo == ChamLower) {
//for(n = A->myrank % A->q; n < m; n+=A->q) {
for(n = 0; n < m; n++) {
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
/* compute sums of absolute values on rows of tile m */
INSERT_TASK_dzasum(
&options,
ChamRowwise, ChamUpperLower, tempkm, tempkn,
A(m, n), ldam, VECNORMS_STEP1(m, n));
/* same operation on the symmetric part */
INSERT_TASK_dzasum(
&options,
ChamColumnwise, ChamUpperLower, tempkm, tempkn,
A(m, n), ldam, VECNORMS_STEP1(n, m));
}
}
/*
* ChamUpper
*/
else {
// for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q );
// n < A->mt; n+=A->q) {
for(n = m+1; n < A->mt; n++) {
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
/* compute sums of absolute values on rows of tile m */
INSERT_TASK_dzasum(
&options,
ChamRowwise, ChamUpperLower, tempkm, tempkn,
A(m, n), ldam, VECNORMS_STEP1(m, n));
/* same operation on the symmetric part */
INSERT_TASK_dzasum(
&options,
ChamColumnwise, ChamUpperLower, tempkm, tempkn,
A(m, n), ldam, VECNORMS_STEP1(n, m));
}
}
}
/* compute vector sum between tiles in rows */
for(m = (A->myrank / A->q); m < A->mt; m+=A->p) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
INSERT_TASK_dlaset(
&options,
ChamUpperLower, tempkm, 1,
0., 0.,
VECNORMS_STEP2(m, 0), 1);
for(n = 0; n < A->nt; n++) {
INSERT_TASK_dgeadd(
&options,
ChamNoTrans, tempkm, 1, A->mb,
1.0, VECNORMS_STEP1(m, n), tempkm,
1.0, VECNORMS_STEP2(m, 0), tempkm);
}
/*
* Compute max norm of each segment of the final vector in the
* previous workspace
*/
INSERT_TASK_dlange(
&options,
ChamMaxNorm, tempkm, 1, A->nb,
VECNORMS_STEP2(m, 0), tempkm,
VECNORMS_STEP1(m, 0));
}
/* Initialize RESULT array */
INSERT_TASK_dlaset(
&options,
ChamUpperLower, 1, 1,
0., 0.,
RESULT(0,0), 1);
/* compute max norm between tiles in the column */
if (A->myrank % A->q == 0) {
for(m = 0; m < A->mt; m++) {
INSERT_TASK_dlange_max(
&options,
VECNORMS_STEP1(m, 0),
RESULT(0,0));
}
}
/* Scatter norm over processus */
for(m = 0; m < A->p; m++) {
for(n = 0; n < A->q; n++) {
INSERT_TASK_dlacpy(
&options,
ChamUpperLower, 1, 1, 1,
RESULT(0,0), 1,
VECNORMS_STEP1(m, n), 1 );
}
}
CHAMELEON_Desc_Flush( VECNORMS_STEP2, sequence );
CHAMELEON_Desc_Flush( VECNORMS_STEP1, sequence );
CHAMELEON_Desc_Flush( RESULT, sequence );
RUNTIME_sequence_wait(chamctxt, sequence);
*result = *(double *)VECNORMS_STEP1->get_blkaddr(VECNORMS_STEP1, A->myrank / A->q, A->myrank % A->q );
CHAMELEON_Desc_Destroy( &(VECNORMS_STEP1) );
CHAMELEON_Desc_Destroy( &(VECNORMS_STEP2) );
CHAMELEON_Desc_Destroy( &(RESULT) );
break;
/*
* ChamFrobeniusNorm
*/
case ChamFrobeniusNorm:
workm = chameleon_max( A->mt, A->p );
workn = chameleon_max( A->nt, A->q );
CHAMELEON_Desc_Create(&(VECNORMS_STEP1), NULL, ChamRealDouble, 1, 2, 2,
workm, 2*workn, 0, 0, workm, 2*workn, A->p, A->q);
CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 2, 2,
1, 2, 0, 0, 1, 2, 1, 1);
/* Compute local norm to each tile */
for(m = (A->myrank / A->q); m < A->mt; m+=A->p) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m);
/* Zeroes my intermediate vectors */
for(n = A->myrank % A->q; n < workn; n+=A->q) {
INSERT_TASK_dlaset(
&options,
ChamUpperLower, 1, 2,
1., 0.,
VECNORMS_STEP1(m,n), 1);
}
/* compute norm on diagonal tile m */
INSERT_TASK_zhessq(
&options,
uplo, tempkm,
A(m, m), ldam,
VECNORMS_STEP1(m, m));
/*
* ChamLower
*/
if (uplo == ChamLower) {
//for(n = A->myrank % A->q; n < m; n+=A->q) {
for(n = 0; n < m; n++) {
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
/* compute norm on the lower part */
INSERT_TASK_zgessq(
&options,
tempkm, tempkn,
A(m, n), ldam,
VECNORMS_STEP1(m, n));
/* same operation on the symmetric part */
INSERT_TASK_zgessq(
&options,
tempkm, tempkn,
A(m, n), ldam,
VECNORMS_STEP1(m, n));
}
}
/*
* ChamUpper
*/
else {
// for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q );
// n < A->mt; n+=A->q) {
for(n = m+1; n < A->mt; n++) {
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
/* compute norm on the lower part */
INSERT_TASK_zgessq(
&options,
tempkm, tempkn,
A(m, n), ldam,
VECNORMS_STEP1(m, n));
/* same operation on the symmetric part */
INSERT_TASK_zgessq(
&options,
tempkm, tempkn,
A(m, n), ldam,
VECNORMS_STEP1(m, n));
}
}
}
/* Initialize arrays */
INSERT_TASK_dlaset(
&options,
ChamUpperLower, 1, 2,
1., 0.,
RESULT(0,0), 1);
/* Compute accumulation of scl and ssq */
for(m = (A->myrank / A->q); m < A->mt; m+=A->p) {
/*
* ChamLower
*/
if (uplo == ChamLower) {
//for(n = A->myrank % A->q; n < m; n+=A->q) {
for(n = 0; n <= m; n++) {
INSERT_TASK_dplssq(
&options,
VECNORMS_STEP1(m, n),
RESULT(0,0));
}
}
/*
* ChamUpper
*/
else {
// for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q );
// n < A->mt; n+=A->q) {
for(n = m; n < A->mt; n++) {
INSERT_TASK_dplssq(
&options,
VECNORMS_STEP1(m, n),
RESULT(0,0));
}
}
}
/* Compute scl * sqrt(ssq) */
INSERT_TASK_dplssq2(
&options,
RESULT(0,0));
/* Copy max norm in tiles to dispatch on every nodes */
for(m = 0; m < A->p; m++) {
for(n = 0; n < A->q; n++) {
INSERT_TASK_dlacpy(
&options,
ChamUpperLower, 1, 1, 1,
RESULT(0,0), 1,
VECNORMS_STEP1(m, n), 1 );
}
}
CHAMELEON_Desc_Flush( VECNORMS_STEP1, sequence );
CHAMELEON_Desc_Flush( RESULT, sequence );
RUNTIME_sequence_wait(chamctxt, sequence);
*result = *(double *)VECNORMS_STEP1->get_blkaddr(VECNORMS_STEP1, A->myrank / A->q, A->myrank % A->q );
CHAMELEON_Desc_Destroy( &(VECNORMS_STEP1) );
CHAMELEON_Desc_Destroy( &(RESULT) );
break;
/*
* ChamMaxNorm
*/
case ChamMaxNorm:
default:
/* Init workspace handle for the call to zlange but unused */
RUNTIME_options_ws_alloc( &options, 1, 0 );
workm = chameleon_max( A->mt, A->p );
workn = chameleon_max( A->nt, A->q );
CHAMELEON_Desc_Create(&(VECNORMS_STEP1), NULL, ChamRealDouble, 1, 1, 1,
workm, workn, 0, 0, workm, workn, A->p, A->q);
CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 1, 1,
1, 1, 0, 0, 1, 1, 1, 1);
/* Compute local maximum to each tile */
for(m = 0; m < A->mt; m++) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m);
INSERT_TASK_zlanhe(
&options,
ChamMaxNorm, uplo, tempkm, A->nb,
A(m, m), ldam,
VECNORMS_STEP1(m, m));
/*
* ChamLower
*/
if (uplo == ChamLower) {
//for(n = A->myrank % A->q; n < m; n+=A->q) {
for(n = 0; n < m; n++) {
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
INSERT_TASK_zlange(
&options,
ChamMaxNorm, tempkm, tempkn, A->nb,
A(m, n), ldam,
VECNORMS_STEP1(m, n));
}
}
/*
* ChamUpper
*/
else {
//for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q );
// n < A->mt; n+=A->q) {
for(n = m+1; n < A->mt; n++) {
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
INSERT_TASK_zlange(
&options,
ChamMaxNorm, tempkm, tempkn, A->nb,
A(m, n), ldam,
VECNORMS_STEP1(m, n));
}
}
}
/* Initialize RESULT array */
INSERT_TASK_dlaset(
&options,
ChamUpperLower, 1, 1,
0., 0.,
RESULT(0,0), 1);
/* Compute max norm between tiles */
for(m = 0; m < A->mt; m++) {
/*
* ChamLower
*/
if (uplo == ChamLower) {
//for(n = A->myrank % A->q; n < m; n+=A->q) {
for(n = 0; n <= m; n++) {
INSERT_TASK_dlange_max(
&options,
VECNORMS_STEP1(m, n),
RESULT(0,0));
}
}
/*
* ChamUpper
*/
else {
//for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q );
// n < A->mt; n+=A->q) {
for(n = m; n < A->mt; n++) {
INSERT_TASK_dlange_max(
&options,
VECNORMS_STEP1(m, n),
RESULT(0,0));
}
}
}
/* Copy max norm in tiles to dispatch on every nodes */
for(m = 0; m < A->p; m++) {
for(n = 0; n < A->q; n++) {
INSERT_TASK_dlacpy(
&options,
ChamUpperLower, 1, 1, 1,
RESULT(0,0), 1,
VECNORMS_STEP1(m, n), 1 );
}
}
CHAMELEON_Desc_Flush( VECNORMS_STEP1, sequence );
CHAMELEON_Desc_Flush( RESULT, sequence );
RUNTIME_sequence_wait(chamctxt, sequence);
*result = *(double *)VECNORMS_STEP1->get_blkaddr(VECNORMS_STEP1, A->myrank / A->q, A->myrank % A->q );
CHAMELEON_Desc_Destroy( &(VECNORMS_STEP1) );
CHAMELEON_Desc_Destroy( &(RESULT) );
}
RUNTIME_options_ws_free(&options);
RUNTIME_options_finalize(&options, chamctxt);
}
......@@ -21,445 +21,415 @@
*
*/
//ALLOC_WS : A->mb
//WS_ADD : A->mb
#include <stdlib.h>
#include <math.h>
//WS_ADD : A->mb
#include "control/common.h"
#define A(m, n) A, m, n
#define VECNORMS_STEP1(m, n) VECNORMS_STEP1, m, n
#define VECNORMS_STEP2(m, n) VECNORMS_STEP2, m, n
#define RESULT(m, n) RESULT, m, n
/**
*
*/
/**
*
*/
void chameleon_pzlansy(cham_normtype_t norm, cham_uplo_t uplo, CHAM_desc_t *A, double *result,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request)
{
CHAM_desc_t *VECNORMS_STEP1 = NULL;
CHAM_desc_t *VECNORMS_STEP2 = NULL;
CHAM_desc_t *RESULT = NULL;
CHAM_context_t *chamctxt;
RUNTIME_option_t options;
#define A(m, n) A, (m), (n)
#define Wcol(m, n) Wcol, (m), (n)
#define Welt(m, n) Welt, (m), (n)
int workm, workn;
int tempkm, tempkn;
int ldam;
static inline void
chameleon_pzlansy_inf( cham_uplo_t uplo, CHAM_desc_t *A,
CHAM_desc_t *Wcol, CHAM_desc_t *Welt,
RUNTIME_option_t *options)
{
int m, n;
/* int part_p, part_q; */
/* part_p = A->myrank / A->q; */
/* part_q = A->myrank % A->q; */
int MT = A->mt;
int NT = A->nt;
int M = A->m;
int N = A->n;
int P = Welt->p;
int Q = Welt->q;
/**
* Step 1:
* For j in [1,Q], Wcol(m, j) = reduce( A(m, j+k*Q) )
*/
for(m = 0; m < MT; m++) {
int nmin = ( uplo == ChamUpper ) ? m : 0;
int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT;
chamctxt = chameleon_context_self();
if (sequence->status != CHAMELEON_SUCCESS)
return;
RUNTIME_options_init(&options, chamctxt, sequence, request);
int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb;
int ldam = BLKLDD( A, m );
*result = 0.0;
switch ( norm ) {
/*
* ChamOneNorm / ChamInfNorm
*/
case ChamOneNorm:
case ChamInfNorm:
/* Init workspace handle for the call to zlange */
RUNTIME_options_ws_alloc( &options, A->mb, 0 );
for(n = nmin; n < nmax; n++) {
int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb;
workm = A->m;
workn = chameleon_max( A->nt, A->q );
CHAMELEON_Desc_Create(&(VECNORMS_STEP1), NULL, ChamRealDouble, A->mb, 1, A->mb,
workm, workn, 0, 0, workm, workn, A->p, A->q);
if ( n == m ) {
INSERT_TASK_dzasum(
options,
ChamRowwise, uplo, tempmm, tempnn,
A(m, n), ldam, Wcol(m, n) );
}
else {
INSERT_TASK_dzasum(
options,
ChamRowwise, ChamUpperLower, tempmm, tempnn,
A(m, n), ldam, Wcol(m, n) );
INSERT_TASK_dzasum(
options,
ChamColumnwise, ChamUpperLower, tempmm, tempnn,
A(m, n), ldam, Wcol(n, m) );
}
}
}
CHAMELEON_Desc_Create(&(VECNORMS_STEP2), NULL, ChamRealDouble, A->mb, 1, A->mb,
workm, 1, 0, 0, workm, 1, A->p, A->q);
for(m = 0; m < MT; m++) {
int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb;
CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 1, 1,
1, 1, 0, 0, 1, 1, 1, 1);
for(n = Q; n < NT; n++) {
INSERT_TASK_dgeadd(
options,
ChamNoTrans, tempmm, 1, A->nb,
1.0, Wcol(m, n ), tempmm,
1.0, Wcol(m, n%Q), tempmm );
}
/* Zeroes my intermediate vectors */
for(m = 0; m < A->mt; m++) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
for(n = 0; n < workn; n++) {
INSERT_TASK_dlaset(
&options,
ChamUpperLower, tempkm, 1,
0., 0.,
VECNORMS_STEP1(m, n), 1);
}
/**
* Step 2:
* For each j, W(m, j) = reduce( Wcol(m, 0..Q-1) )
*/
for(n = 1; n < Q; n++) {
INSERT_TASK_dgeadd(
options,
ChamNoTrans, tempmm, 1, A->mb,
1.0, Wcol(m, n), tempmm,
1.0, Wcol(m, 0), tempmm );
}
for(m = 0; m < A->mt; m++) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m);
INSERT_TASK_dlange(
options,
ChamMaxNorm, tempmm, 1, A->nb,
Wcol(m, 0), 1, Welt(m, 0));
}
/* compute sums of absolute values on diagonal tile m */
INSERT_TASK_dzasum(
&options,
ChamRowwise, uplo, tempkm, tempkm,
A(m, m), ldam, VECNORMS_STEP1(m, m));
/*
* ChamLower
*/
if (uplo == ChamLower) {
//for(n = A->myrank % A->q; n < m; n+=A->q) {
for(n = 0; n < m; n++) {
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
/* compute sums of absolute values on rows of tile m */
INSERT_TASK_dzasum(
&options,
ChamRowwise, ChamUpperLower, tempkm, tempkn,
A(m, n), ldam, VECNORMS_STEP1(m, n));
/* same operation on the symmetric part */
INSERT_TASK_dzasum(
&options,
ChamColumnwise, ChamUpperLower, tempkm, tempkn,
A(m, n), ldam, VECNORMS_STEP1(n, m));
/**
* Step 3:
* For m in 0..P-1, Welt(m, n) = max( Wcol(m..mt[P], n ) )
*/
for(m = P; m < MT; m++) {
INSERT_TASK_dlange_max(
options,
Welt(m, 0), Welt(m%P, 0) );
}
/**
* Step 4:
* For each i, Welt(i, n) = max( Welt(0..P-1, n) )
*/
for(m = 1; m < P; m++) {
INSERT_TASK_dlange_max(
options,
Welt(m, 0), Welt(0, 0) );
}
}
static inline void
chameleon_pzlansy_max( cham_trans_t trans, cham_uplo_t uplo, CHAM_desc_t *A,
CHAM_desc_t *Welt, RUNTIME_option_t *options)
{
int m, n;
int MT = A->mt;
int NT = A->nt;
int M = A->m;
int N = A->n;
int P = Welt->p;
int Q = Welt->q;
/**
* Step 1:
* For j in [1,Q], Welt(m, j) = reduce( A(m, j+k*Q) )
*/
for(m = 0; m < MT; m++) {
int nmin = (uplo == ChamUpper ) ? m : 0;
int nmax = (uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT;
int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb;
int ldam = BLKLDD( A, m );
for(n = nmin; n < nmax; n++) {
int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb;
if ( n == m ) {
if ( trans == ChamConjTrans) {
INSERT_TASK_zlanhe(
options,
ChamMaxNorm, uplo, tempmm, A->nb,
A(m, n), ldam, Welt(m, n));
}
else {
INSERT_TASK_zlansy(
options,
ChamMaxNorm, uplo, tempmm, A->nb,
A(m, n), ldam, Welt(m, n));
}
}
/*
* ChamUpper
*/
else {
// for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q );
// n < A->mt; n+=A->q) {
for(n = m+1; n < A->mt; n++) {
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
/* compute sums of absolute values on rows of tile m */
INSERT_TASK_dzasum(
&options,
ChamRowwise, ChamUpperLower, tempkm, tempkn,
A(m, n), ldam, VECNORMS_STEP1(m, n));
/* same operation on the symmetric part */
INSERT_TASK_dzasum(
&options,
ChamColumnwise, ChamUpperLower, tempkm, tempkn,
A(m, n), ldam, VECNORMS_STEP1(n, m));
}
INSERT_TASK_zlange(
options,
ChamMaxNorm, tempmm, tempnn, A->nb,
A(m, n), ldam, Welt(m, n));
}
}
/* compute vector sum between tiles in rows */
for(m = 0; m < A->mt; m++) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
INSERT_TASK_dlaset(
&options,
ChamUpperLower, tempkm, 1,
0., 0.,
VECNORMS_STEP2(m, 0), 1);
for(n = 0; n < A->nt; n++) {
INSERT_TASK_dgeadd(
&options,
ChamNoTrans, tempkm, 1, A->mb,
1.0, VECNORMS_STEP1(m, n), tempkm,
1.0, VECNORMS_STEP2(m, 0), tempkm);
}
/*
* Compute max norm of each segment of the final vector in the
* previous workspace
*/
INSERT_TASK_dlange(
&options,
ChamMaxNorm, tempkm, 1, A->nb,
VECNORMS_STEP2(m, 0), tempkm,
VECNORMS_STEP1(m, 0));
}
/* Initialize RESULT array */
INSERT_TASK_dlaset(
&options,
ChamUpperLower, 1, 1,
0., 0.,
RESULT(0,0), 1);
/* compute max norm between tiles in the column */
if (A->myrank % A->q == 0) {
for(m = 0; m < A->mt; m++) {
if ( n >= Q ) {
INSERT_TASK_dlange_max(
&options,
VECNORMS_STEP1(m, 0),
RESULT(0,0));
options,
Welt(m, n), Welt(m, n%Q) );
}
}
/* Scatter norm over processus */
for(m = 0; m < A->p; m++) {
for(n = 0; n < A->q; n++) {
INSERT_TASK_dlacpy(
&options,
ChamUpperLower, 1, 1, 1,
RESULT(0,0), 1,
VECNORMS_STEP1(m, n), 1 );
}
/**
* Step 2:
* For each j, W(m, j) = reduce( Welt(m, 0..Q-1) )
*/
for(n = 1; n < Q; n++) {
INSERT_TASK_dlange_max(
options,
Welt(m, n), Welt(m, 0) );
}
CHAMELEON_Desc_Flush( VECNORMS_STEP2, sequence );
CHAMELEON_Desc_Flush( VECNORMS_STEP1, sequence );
CHAMELEON_Desc_Flush( RESULT, sequence );
RUNTIME_sequence_wait(chamctxt, sequence);
CHAMELEON_Desc_Destroy( &(VECNORMS_STEP2) );
break;
/*
* ChamFrobeniusNorm
}
/**
* Step 3:
* For m in 0..P-1, Welt(m, n) = max( Welt(m..mt[P], n ) )
*/
case ChamFrobeniusNorm:
workm = chameleon_max( A->mt, A->p );
workn = chameleon_max( A->nt, A->q );
for(m = P; m < MT; m++) {
INSERT_TASK_dlange_max(
options,
Welt(m, 0), Welt(m%P, 0) );
}
CHAMELEON_Desc_Create(&(VECNORMS_STEP1), NULL, ChamRealDouble, 1, 2, 2,
workm, 2*workn, 0, 0, workm, 2*workn, A->p, A->q);
CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 2, 2,
1, 2, 0, 0, 1, 2, 1, 1);
/**
* Step 4:
* For each i, Welt(i, n) = max( Welt(0..P-1, n) )
*/
for(m = 1; m < P; m++) {
INSERT_TASK_dlange_max(
options,
Welt(m, 0), Welt(0, 0) );
}
}
/* Compute local norm to each tile */
for(m = (A->myrank / A->q); m < A->mt; m+=A->p) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m);
static inline void
chameleon_pzlansy_frb( cham_trans_t trans, cham_uplo_t uplo,
CHAM_desc_t *A, CHAM_desc_t *Welt,
RUNTIME_option_t *options)
{
int m, n;
int MT = A->mt;
int NT = A->nt;
int M = A->m;
int N = A->n;
int P = Welt->p;
int Q = Welt->q;
/**
* Step 1:
* For j in [1,Q], Welt(m, j) = reduce( A(m, j+k*Q) )
*/
for(m = 0; m < MT; m++) {
int nmin = (uplo == ChamUpper ) ? m : 0;
int nmax = (uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT;
/* Zeroes my intermediate vector */
INSERT_TASK_dlaset(
&options,
ChamUpperLower, 1, 2,
1., 0.,
VECNORMS_STEP1(m, m), 1);
/* compute norm on diagonal tile m */
INSERT_TASK_zsyssq(
&options,
uplo, tempkm,
A(m, m), ldam,
VECNORMS_STEP1(m, m));
/*
* ChamLower
*/
if (uplo == ChamLower) {
//for(n = A->myrank % A->q; n < m; n+=A->q) {
for(n = 0; n < m; n++) {
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
/* Zeroes my intermediate vector */
INSERT_TASK_dlaset(
&options,
ChamUpperLower, 1, 2,
1., 0.,
VECNORMS_STEP1(m, n), 1);
/* compute norm on the lower part */
INSERT_TASK_zgessq(
&options,
tempkm, tempkn,
A(m, n), ldam,
VECNORMS_STEP1(m, n));
/* same operation on the symmetric part */
INSERT_TASK_zgessq(
&options,
tempkm, tempkn,
A(m, n), ldam,
VECNORMS_STEP1(m, n));
}
}
/*
* ChamUpper
*/
else {
// for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q );
// n < A->mt; n+=A->q) {
for(n = m+1; n < A->mt; n++) {
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
/* Zeroes my intermediate vector */
INSERT_TASK_dlaset(
&options,
ChamUpperLower, 1, 2,
1., 0.,
VECNORMS_STEP1(m, n), 1);
/* compute norm on the lower part */
INSERT_TASK_zgessq(
&options,
tempkm, tempkn,
A(m, n), ldam,
VECNORMS_STEP1(m, n));
/* same operation on the symmetric part */
INSERT_TASK_zgessq(
&options,
tempkm, tempkn,
A(m, n), ldam,
VECNORMS_STEP1(m, n));
}
}
}
int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb;
int ldam = BLKLDD( A, m );
/* Zeroes my intermediate vector */
INSERT_TASK_dlaset(
&options,
ChamUpperLower, 1, 2,
1., 0.,
RESULT(0,0), 1);
/* Compute accumulation of scl and ssq */
for(m = (A->myrank / A->q); m < A->mt; m+=A->p) {
/*
* ChamLower
*/
if (uplo == ChamLower) {
//for(n = A->myrank % A->q; n < m; n+=A->q) {
for(n = 0; n <= m; n++) {
INSERT_TASK_dplssq(
&options,
VECNORMS_STEP1(m, n),
RESULT(0,0));
for(n = nmin; n < nmax; n++) {
int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb;
if ( n == m ) {
if ( trans == ChamConjTrans) {
INSERT_TASK_zhessq(
options, uplo, tempmm,
A(m, n), ldam, Welt(m, n) );
}
else {
INSERT_TASK_zsyssq(
options, uplo, tempmm,
A(m, n), ldam, Welt(m, n) );
}
}
/*
* ChamUpper
*/
else {
// for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q );
// n < A->mt; n+=A->q) {
for(n = m; n < A->mt; n++) {
INSERT_TASK_dplssq(
&options,
VECNORMS_STEP1(m, n),
RESULT(0,0));
}
INSERT_TASK_zgessq(
options, tempmm, tempnn,
A(m, n), ldam, Welt(m, n) );
INSERT_TASK_zgessq(
options, tempmm, tempnn,
A(m, n), ldam, Welt(n, m) );
}
}
}
/* Compute scl * sqrt(ssq) */
INSERT_TASK_dplssq2(
&options,
RESULT(0,0));
for(m = 0; m < MT; m++) {
for(n = Q; n < NT; n++) {
INSERT_TASK_dplssq(
options, Welt(m, n), Welt(m, n%Q) );
}
/* Copy max norm in tiles to dispatch on every nodes */
for(m = 0; m < A->p; m++) {
for(n = 0; n < A->q; n++) {
INSERT_TASK_dlacpy(
&options,
ChamUpperLower, 1, 1, 1,
RESULT(0,0), 1,
VECNORMS_STEP1(m, n), 1 );
}
/**
* Step 2:
* For each j, W(m, j) = reduce( Welt(m, 0..Q-1) )
*/
for(n = 1; n < Q; n++) {
INSERT_TASK_dplssq(
options, Welt(m, n), Welt(m, 0) );
}
}
CHAMELEON_Desc_Flush( VECNORMS_STEP1, sequence );
CHAMELEON_Desc_Flush( RESULT, sequence );
RUNTIME_sequence_wait(chamctxt, sequence);
break;
/**
* Step 3:
* For m in 0..P-1, Welt(m, n) = max( Welt(m..mt[P], n ) )
*/
for(m = P; m < MT; m++) {
INSERT_TASK_dplssq(
options, Welt(m, 0), Welt(m%P, 0) );
}
/*
* ChamMaxNorm
/**
* Step 4:
* For each i, Welt(i, n) = max( Welt(0..P-1, n) )
*/
case ChamMaxNorm:
default:
/* Init workspace handle for the call to zlange but unused */
for(m = 1; m < P; m++) {
INSERT_TASK_dplssq(
options, Welt(m, 0), Welt(0, 0) );
}
INSERT_TASK_dplssq2(
options, Welt(0, 0) );
}
/**
*
*/
void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_trans_t trans,
CHAM_desc_t *A, double *result,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
{
CHAM_context_t *chamctxt;
RUNTIME_option_t options;
CHAM_desc_t *Wcol = NULL;
CHAM_desc_t *Welt = NULL;
double alpha = 0.0;
double beta = 0.0;
int workn, workmt, worknt;
int m, n;
chamctxt = chameleon_context_self();
if (sequence->status != CHAMELEON_SUCCESS)
return;
RUNTIME_options_init(&options, chamctxt, sequence, request);
*result = 0.0;
workmt = chameleon_max( A->mt, A->p );
worknt = chameleon_max( A->nt, A->q );
workn = chameleon_max( A->n, A->q );
switch ( norm ) {
case ChamOneNorm:
case ChamInfNorm:
RUNTIME_options_ws_alloc( &options, 1, 0 );
workm = chameleon_max( A->mt, A->p );
workn = chameleon_max( A->nt, A->q );
CHAMELEON_Desc_Create( &Wcol, NULL, ChamRealDouble, 1, A->nb, A->nb,
workmt, worknt * A->nb, 0, 0, workmt, worknt * A->nb, A->p, A->q );
CHAMELEON_Desc_Create(&(VECNORMS_STEP1), NULL, ChamRealDouble, 1, 1, 1,
workm, workn, 0, 0, workm, workn, A->p, A->q);
CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 1, 1,
1, 1, 0, 0, 1, 1, 1, 1);
CHAMELEON_Desc_Create( &Welt, NULL, ChamRealDouble, 1, 1, 1,
A->p, worknt, 0, 0, A->p, worknt, A->p, A->q );
/* Compute local maximum to each tile */
for(m = 0; m < A->mt; m++) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m);
break;
INSERT_TASK_zlansy(
&options,
ChamMaxNorm, uplo, tempkm, A->nb,
A(m, m), ldam,
VECNORMS_STEP1(m, m));
/*
* ChamLower
*/
if (uplo == ChamLower) {
//for(n = A->myrank % A->q; n < m; n+=A->q) {
for(n = 0; n < m; n++) {
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
INSERT_TASK_zlange(
&options,
ChamMaxNorm, tempkm, tempkn, A->nb,
A(m, n), ldam,
VECNORMS_STEP1(m, n));
}
}
/*
* ChamUpper
*/
else {
//for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q );
// n < A->mt; n+=A->q) {
for(n = m+1; n < A->mt; n++) {
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
INSERT_TASK_zlange(
&options,
ChamMaxNorm, tempkm, tempkn, A->nb,
A(m, n), ldam,
VECNORMS_STEP1(m, n));
}
}
}
/*
* ChamFrobeniusNorm
*/
case ChamFrobeniusNorm:
RUNTIME_options_ws_alloc( &options, 1, 0 );
/* Zeroes RESULT array */
INSERT_TASK_dlaset(
&options,
ChamUpperLower, 1, 1,
0., 0.,
RESULT(0,0), 1);
/* Compute max norm between tiles */
for(m = 0; m < A->mt; m++) {
/*
* ChamLower
*/
if (uplo == ChamLower) {
//for(n = A->myrank % A->q; n < m; n+=A->q) {
for(n = 0; n <= m; n++) {
INSERT_TASK_dlange_max(
&options,
VECNORMS_STEP1(m, n),
RESULT(0,0));
}
}
/*
* ChamUpper
*/
else {
//for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q );
// n < A->mt; n+=A->q) {
for(n = m; n < A->mt; n++) {
INSERT_TASK_dlange_max(
&options,
VECNORMS_STEP1(m, n),
RESULT(0,0));
}
alpha = 1.;
CHAMELEON_Desc_Create( &Welt, NULL, ChamRealDouble, 2, 1, 2,
workmt*2, workn, 0, 0, workmt*2, workn, A->p, A->q );
break;
/*
* ChamMaxNorm
*/
case ChamMaxNorm:
default:
RUNTIME_options_ws_alloc( &options, 1, 0 );
CHAMELEON_Desc_Create( &Welt, NULL, ChamRealDouble, 1, 1, 1,
workmt, workn, 0, 0, workmt, workn, A->p, A->q );
}
/* Initialize workspaces */
if ( (norm == ChamInfNorm) ||
(norm == ChamOneNorm) )
{
/* Initialize Wcol tile */
for(m = 0; m < Wcol->mt; m++) {
for(n = 0; n < Wcol->nt; n++) {
INSERT_TASK_dlaset(
&options,
ChamUpperLower, Wcol->mb, Wcol->nb,
alpha, beta,
Wcol(m,n), Wcol->mb );
}
}
}
for(m = 0; m < Welt->mt; m++) {
for(n = 0; n < Welt->nt; n++) {
INSERT_TASK_dlaset(
&options,
ChamUpperLower, Welt->mb, Welt->nb,
alpha, beta,
Welt(m,n), Welt->mb );
}
}
switch ( norm ) {
case ChamOneNorm:
case ChamInfNorm:
chameleon_pzlansy_inf( uplo, A, Wcol, Welt, &options );
CHAMELEON_Desc_Flush( Wcol, sequence );
break;
case ChamFrobeniusNorm:
chameleon_pzlansy_frb( trans, uplo, A, Welt, &options );
break;
case ChamMaxNorm:
default:
chameleon_pzlansy_max( trans, uplo, A, Welt, &options );
}
/* Copy max norm in tiles to dispatch on every nodes */
for(m = 0; m < A->p; m++) {
for(n = 0; n < A->q; n++) {
/**
* Broadcast the result
*/
for(m = 0; m < A->p; m++) {
for(n = 0; n < A->q; n++) {
if ( (m != 0) && (n != 0) ) {
INSERT_TASK_dlacpy(
&options,
ChamUpperLower, 1, 1, 1,
RESULT(0,0), 1,
VECNORMS_STEP1(m, n), 1 );
Welt(0,0), 1, Welt(m, n), 1);
}
}
CHAMELEON_Desc_Flush( VECNORMS_STEP1, sequence );
CHAMELEON_Desc_Flush( RESULT, sequence );
RUNTIME_sequence_wait(chamctxt, sequence);
}
*result = *(double *)VECNORMS_STEP1->get_blkaddr(VECNORMS_STEP1, A->myrank / A->q, A->myrank % A->q );
CHAMELEON_Desc_Flush( Welt, sequence );
RUNTIME_sequence_wait(chamctxt, sequence);
*result = *(double *)Welt->get_blkaddr(Welt, A->myrank / A->q, A->myrank % A->q );
if ( Wcol != NULL ) {
CHAMELEON_Desc_Destroy( &Wcol );
}
CHAMELEON_Desc_Destroy( &Welt );
CHAMELEON_Desc_Destroy( &(VECNORMS_STEP1) );
CHAMELEON_Desc_Destroy( &(RESULT) );
RUNTIME_options_ws_free(&options);
RUNTIME_options_finalize(&options, chamctxt);
}
/**
*
* @file pzlantr.c
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zlantr parallel algorithm
*
* @version 1.0.0
* @comment This file has been automatically generated
* from Plasma 2.6.0 for CHAMELEON 1.0.0
* @author Mathieu Faverge
* @date 2010-11-15
* @precisions normal z -> c d s
*
*/
#include <stdlib.h>
#include <math.h>
#include "control/common.h"
#define A(m, n) A, m, n
#define W1(m, n) W1, m, n
#define W2(m, n) W2, m, n
#define RESULT(m, n) RESULT, m, n
/**
*
*/
void chameleon_pzlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
CHAM_desc_t *A, double *result,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request)
{
CHAM_desc_t *W1 = NULL;
CHAM_desc_t *W2 = NULL;
CHAM_desc_t *RESULT = NULL;
CHAM_context_t *chamctxt;
RUNTIME_option_t options;
int workm, workn;
int tempkm, tempkn;
int ldam, ldan;
int m, n, minMNT;
/* int part_p, part_q; */
minMNT = chameleon_min( A->mt, A->nt );
/* part_p = A->myrank / A->q; */
/* part_q = A->myrank % A->q; */
chamctxt = chameleon_context_self();
if (sequence->status != CHAMELEON_SUCCESS)
return;
RUNTIME_options_init(&options, chamctxt, sequence, request);
*result = 0.0;
switch ( norm ) {
/*
* ChamOneNorm
*/
case ChamOneNorm:
/* Init workspace handle for the call to zlange but unused */
RUNTIME_options_ws_alloc( &options, 1, 0 );
workm = chameleon_max( A->mt, A->p );
workn = ( uplo == ChamLower ) ? chameleon_min( A->m, A->n ) : A->n;
CHAMELEON_Desc_Create(&(W1), NULL, ChamRealDouble, 1, A->nb, A->nb,
workm, workn, 0, 0, workm, workn, A->p, A->q);
CHAMELEON_Desc_Create(&(W2), NULL, ChamRealDouble, 1, A->nb, A->nb,
1, workn, 0, 0, 1, workn, A->p, A->q);
CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 1, 1,
1, 1, 0, 0, 1, 1, 1, 1);
/*
* ChamUpper
*/
if (uplo == ChamUpper) {
/* Zeroes intermediate vector */
for(n = 0; n < W2->nt; n++) {
tempkn = n == W2->nt-1 ? W2->n-n*W2->nb : W2->nb;
INSERT_TASK_dlaset(
&options,
ChamUpperLower, 1, tempkn,
0., 0.,
W2(0, n), 1);
}
for(m = 0; m < minMNT; m++) {
/* Zeroes intermediate vectors */
for(n = m; n < W1->nt; n++) {
tempkn = n == W1->nt-1 ? W1->n-n*W1->nb : W1->nb;
INSERT_TASK_dlaset(
&options,
ChamUpperLower, 1, tempkn,
0., 0.,
W1(m, n), 1);
}
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
tempkn = m == A->nt-1 ? A->n-m*A->nb : A->nb;
ldam = BLKLDD(A, m);
/* compute sums of absolute values on columns of diag tile */
INSERT_TASK_ztrasm(
&options,
ChamColumnwise, uplo, diag, tempkm, tempkn,
A(m, m), ldam,
W1(m, m));
/* compute sums of absolute values on columns of each tile */
for(n = m+1; n < A->nt; n++) {
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
INSERT_TASK_dzasum(
&options,
ChamColumnwise, ChamUpperLower, tempkm, tempkn,
A(m, n), ldam, W1(m, n));
}
/* Compute vector sums between tiles in columns */
for(n = m; n < W1->nt; n++) {
tempkn = n == W1->nt-1 ? W1->n-n*W1->nb : W1->nb;
INSERT_TASK_dgeadd(
&options,
ChamNoTrans, 1, tempkn, W1->mb,
1.0, W1(m, n), 1,
1.0, W2(0, n), 1);
}
}
}
/*
* ChamLower
*/
else {
for(n = 0; n < minMNT; n++) {
tempkm = n == A->mt-1 ? A->m-n*A->mb : A->mb;
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
ldan = BLKLDD(A, n);
/* Zeroes intermediate vectors */
for(m = n; m < A->mt; m++) {
INSERT_TASK_dlaset(
&options,
ChamUpperLower, 1, tempkn,
0., 0.,
W1(m, n), 1);
}
/* Zeroes the second intermediate vector */
INSERT_TASK_dlaset(
&options,
ChamUpperLower, 1, tempkn,
0., 0.,
W2(0, n), 1);
/* compute sums of absolute values on columns of diag tile */
INSERT_TASK_ztrasm(
&options,
ChamColumnwise, uplo, diag, tempkm, tempkn,
A(n, n), ldan,
W1(n, n));
/* compute sums of absolute values on columns of each tile */
for(m = n+1; m < A->mt; m++) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m);
INSERT_TASK_dzasum(
&options,
ChamColumnwise, ChamUpperLower, tempkm, tempkn,
A(m, n), ldam, W1(m, n));
}
/* Compute vector sums between tiles in columns */
for(m = n; m < A->mt; m++) {
INSERT_TASK_dgeadd(
&options,
ChamNoTrans, 1, tempkn, A->mb,
1.0, W1(m, n), 1,
1.0, W2(0, n), 1);
}
}
}
/*
* Compute max norm of each segment of the final vector in the
* previous workspace
*/
for(n = 0; n < A->nt; n++) {
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
INSERT_TASK_dlange(
&options,
ChamMaxNorm, 1, tempkn, A->nb,
W2(0, n), 1,
W1(0, n));
}
/* Initialize RESULT array */
INSERT_TASK_dlaset(
&options,
ChamUpperLower, 1, 1,
0., 0.,
RESULT(0,0), 1);
/* Compute max norm between tiles in the row */
if (A->myrank < A->q) {
for(n = 0; n < A->nt; n++) {
INSERT_TASK_dlange_max(
&options,
W1(0, n),
RESULT(0,0));
}
}
/* Scatter norm over processus */
for(m = 0; m < A->p; m++) {
for(n = 0; n < A->q; n++) {
INSERT_TASK_dlacpy(
&options,
ChamUpperLower, 1, 1, 1,
RESULT(0,0), 1,
W1(m, n), 1 );
}
}
CHAMELEON_Desc_Flush( W2, sequence );
CHAMELEON_Desc_Flush( W1, sequence );
CHAMELEON_Desc_Flush( RESULT, sequence );
RUNTIME_sequence_wait(chamctxt, sequence);
*result = *(double *)W1->get_blkaddr(W1, A->myrank / A->q, A->myrank % A->q );
CHAMELEON_Desc_Destroy( &(W1) );
CHAMELEON_Desc_Destroy( &(W2) );
CHAMELEON_Desc_Destroy( &(RESULT) );
break;
/*
* ChamInfNorm
*/
case ChamInfNorm:
/* Init workspace handle for the call to zlange */
RUNTIME_options_ws_alloc( &options, A->mb, 0 );
workm = ( uplo == ChamUpper ) ? chameleon_min( A->m, A->n ) : A->m;
workn = chameleon_max( A->nt, A->q );
CHAMELEON_Desc_Create(&(W1), NULL, ChamRealDouble, A->mb, 1, A->mb,
workm, workn, 0, 0, workm, workn, A->p, A->q);
CHAMELEON_Desc_Create(&(W2), NULL, ChamRealDouble, A->mb, 1, A->mb,
workm, 1, 0, 0, workm, 1, A->p, A->q);
CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 1, 1,
1, 1, 0, 0, 1, 1, 1, 1);
/*
* ChamUpper
*/
if (uplo == ChamUpper) {
for(m = 0; m < minMNT; m++) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
tempkn = m == A->nt-1 ? A->n-m*A->nb : A->nb;
ldam = BLKLDD(A, m);
/* Zeroes intermediate vectors */
for(n = m; n < A->nt; n++) {
INSERT_TASK_dlaset(
&options,
ChamUpperLower, tempkm, 1,
0., 0.,
W1(m, n), 1);
}
/* Zeroes intermediate vector */
INSERT_TASK_dlaset(
&options,
ChamUpperLower, tempkm, 1,
0., 0.,
W2(m, 0), 1);
/* compute sums of absolute values on rows of diag tile */
INSERT_TASK_ztrasm(
&options,
ChamRowwise, uplo, diag, tempkm, tempkn,
A(m, m), ldam,
W1(m, m));
/* compute sums of absolute values on rows of each tile */
for(n = m+1; n < A->nt; n++) {
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
INSERT_TASK_dzasum(
&options,
ChamRowwise, ChamUpperLower, tempkm, tempkn,
A(m, n), ldam, W1(m, n));
}
/* Compute vector sums between tiles in rows */
for(n = m; n < A->nt; n++) {
INSERT_TASK_dgeadd(
&options,
ChamNoTrans, tempkm, 1, A->mb,
1.0, W1(m, n), tempkm,
1.0, W2(m, 0), tempkm);
}
}
}
/*
* ChamLower
*/
else {
/* Zeroes intermediate vector */
for(m = 0; m < A->mt; m++) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
INSERT_TASK_dlaset(
&options,
ChamUpperLower, tempkm, 1,
0., 0.,
W2(m, 0), 1);
}
for(n = 0; n < minMNT; n++) {
/* Zeroes intermediate vectors */
for(m = n; m < A->mt; m++) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
INSERT_TASK_dlaset(
&options,
ChamUpperLower, tempkm, 1,
0., 0.,
W1(m, n), tempkm);
}
tempkm = n == A->mt-1 ? A->m-n*A->mb : A->mb;
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
ldan = BLKLDD(A, n);
/* compute sums of absolute values on rows of diag tile */
INSERT_TASK_ztrasm(
&options,
ChamRowwise, uplo, diag, tempkm, tempkn,
A(n, n), ldan,
W1(n, n));
/* compute sums of absolute values on rows of each tile */
for(m = n+1; m < A->mt; m++) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m);
INSERT_TASK_dzasum(
&options,
ChamRowwise, ChamUpperLower, tempkm, tempkn,
A(m, n), ldam, W1(m, n));
}
/* Compute vector sums between tiles in rows */
for(m = n; m < A->mt; m++) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
INSERT_TASK_dgeadd(
&options,
ChamNoTrans, tempkm, 1, A->mb,
1.0, W1(m, n), tempkm,
1.0, W2(m, 0), tempkm);
}
}
}
/*
* Compute max norm of each segment of the final vector in the
* previous workspace
*/
for(m = 0; m < W1->mt; m++) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
INSERT_TASK_dlange(
&options,
ChamMaxNorm, tempkm, 1, A->nb,
W2(m, 0), 1,
W1(m, 0));
}
/* Initialize RESULT array */
INSERT_TASK_dlaset(
&options,
ChamUpperLower, 1, 1,
0., 0.,
RESULT(0,0), 1);
/* compute max norm between tiles in the column */
if (A->myrank % A->q == 0) {
for(m = 0; m < W1->mt; m++) {
INSERT_TASK_dlange_max(
&options,
W1(m, 0),
RESULT(0,0));
}
}
/* Scatter norm over processus */
for(m = 0; m < A->p; m++) {
for(n = 0; n < A->q; n++) {
INSERT_TASK_dlacpy(
&options,
ChamUpperLower, 1, 1, 1,
RESULT(0,0), 1,
W1(m, n), 1 );
}
}
CHAMELEON_Desc_Flush( W2, sequence );
CHAMELEON_Desc_Flush( W1, sequence );
CHAMELEON_Desc_Flush( RESULT, sequence );
RUNTIME_sequence_wait(chamctxt, sequence);
*result = *(double *)W1->get_blkaddr(W1, A->myrank / A->q, A->myrank % A->q );
CHAMELEON_Desc_Destroy( &(W1) );
CHAMELEON_Desc_Destroy( &(W2) );
CHAMELEON_Desc_Destroy( &(RESULT) );
break;
/*
* ChamFrobeniusNorm
*/
case ChamFrobeniusNorm:
workm = chameleon_max( A->mt, A->p );
workn = chameleon_max( A->nt, A->q );
CHAMELEON_Desc_Create(&(W1), NULL, ChamRealDouble, 1, 2, 2,
workm, 2*workn, 0, 0, workm, 2*workn, A->p, A->q);
CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 2, 2,
1, 2, 0, 0, 1, 2, 1, 1);
/*
* ChamLower
*/
if (uplo == ChamLower) {
/* Compute local maximum to each tile */
for(n = 0; n < minMNT; n++) {
tempkm = n == A->mt-1 ? A->m-n*A->mb : A->mb;
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
ldan = BLKLDD(A, n);
/* Zeroes my intermediate vectors */
for(m = n; m < A->mt; m++) {
INSERT_TASK_dlaset(
&options,
ChamUpperLower, 1, 2,
1., 0.,
W1(m,n), 1);
}
/* Compute local norm of the diagonal tile */
INSERT_TASK_ztrssq(
&options,
uplo, diag, tempkm, tempkn,
A(n, n), ldan,
W1(n, n));
/* Compute local norm to each tile */
for(m = n+1; m < A->mt; m++) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m);
INSERT_TASK_zgessq(
&options,
tempkm, tempkn,
A(m, n), ldam,
W1(m, n));
}
}
}
/*
* ChamUpper
*/
else {
/* Compute local maximum to each tile */
for(m = 0; m < minMNT; m++) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
tempkn = m == A->nt-1 ? A->n-m*A->nb : A->nb;
ldam = BLKLDD(A, m);
/* Zeroes my intermediate vectors */
for(n = m; n < A->nt; n++) {
INSERT_TASK_dlaset(
&options,
ChamUpperLower, 1, 2,
1., 0.,
W1(m,n), 1);
}
/* Compute local norm of the diagonal tile */
INSERT_TASK_ztrssq(
&options,
uplo, diag, tempkm, tempkn,
A(m, m), ldam,
W1(m, m));
/* Compute local norm to each tile */
for(n = m+1; n < A->nt; n++) {
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
INSERT_TASK_zgessq(
&options,
tempkm, tempkn,
A(m, n), ldam,
W1(m, n));
}
}
}
/* Initialize arrays */
INSERT_TASK_dlaset(
&options,
ChamUpperLower, 1, 2,
1., 0.,
RESULT(0,0), 1);
/*
* ChamLower
*/
if (uplo == ChamLower) {
/* Compute accumulation of scl and ssq */
for(n = 0; n < minMNT; n++) {
for(m = n; m < A->mt; m++) {
INSERT_TASK_dplssq(
&options,
W1(m, n),
RESULT(0,0));
}
}
}
/*
* ChamUpper
*/
else {
/* Compute accumulation of scl and ssq */
for(m = 0; m < minMNT; m++) {
for(n = m; n < A->nt; n++) {
INSERT_TASK_dplssq(
&options,
W1(m, n),
RESULT(0,0));
}
}
}
/* Compute scl * sqrt(ssq) */
INSERT_TASK_dplssq2(
&options,
RESULT(0,0));
/* Copy max norm in tiles to dispatch on every nodes */
for(m = 0; m < A->p; m++) {
for(n = 0; n < A->q; n++) {
INSERT_TASK_dlacpy(
&options,
ChamUpperLower, 1, 1, 1,
RESULT(0,0), 1,
W1(m, n), 1 );
}
}
CHAMELEON_Desc_Flush( W1, sequence );
CHAMELEON_Desc_Flush( RESULT, sequence );
RUNTIME_sequence_wait(chamctxt, sequence);
*result = *(double *)W1->get_blkaddr(W1, A->myrank / A->q, A->myrank % A->q );
CHAMELEON_Desc_Destroy( &(W1) );
CHAMELEON_Desc_Destroy( &(RESULT) );
break;
/*
* ChamMaxNorm
*/
case ChamMaxNorm:
default:
/* Init workspace handle for the call to zlange but unused */
RUNTIME_options_ws_alloc( &options, 1, 0 );
workm = chameleon_max( A->mt, A->p );
workn = chameleon_max( A->nt, A->q );
CHAMELEON_Desc_Create(&(W1), NULL, ChamRealDouble, 1, 1, 1,
workm, workn, 0, 0, workm, workn, A->p, A->q);
CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 1, 1,
1, 1, 0, 0, 1, 1, 1, 1);
/*
* ChamLower
*/
if (uplo == ChamLower) {
/* Compute local maximum to each tile */
for(n = 0; n < minMNT; n++) {
tempkm = n == A->mt-1 ? A->m-n*A->mb : A->mb;
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
ldan = BLKLDD(A, n);
INSERT_TASK_zlantr(
&options,
ChamMaxNorm, uplo, diag,
tempkm, tempkn, A->nb,
A(n, n), ldan,
W1(n, n));
for(m = n+1; m < A->mt; m++) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m);
INSERT_TASK_zlange(
&options,
ChamMaxNorm, tempkm, tempkn, A->nb,
A(m, n), ldam,
W1(m, n));
}
}
}
/*
* ChamUpper
*/
else {
/* Compute local maximum to each tile */
for(m = 0; m < minMNT; m++) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
tempkn = m == A->nt-1 ? A->n-m*A->nb : A->nb;
ldam = BLKLDD(A, m);
INSERT_TASK_zlantr(
&options,
ChamMaxNorm, uplo, diag,
tempkm, tempkn, A->nb,
A(m, m), ldam,
W1(m, m));
for(n = m+1; n < A->nt; n++) {
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
INSERT_TASK_zlange(
&options,
ChamMaxNorm, tempkm, tempkn, A->nb,
A(m, n), ldam,
W1(m, n));
}
}
}
/* Initialize RESULT array */
INSERT_TASK_dlaset(
&options,
ChamUpperLower, 1, 1,
0., 0.,
RESULT(0,0), 1);
/*
* ChamLower
*/
if (uplo == ChamLower) {
/* Compute max norm between tiles */
for(n = 0; n < minMNT; n++) {
for(m = n; m < A->mt; m++) {
INSERT_TASK_dlange_max(
&options,
W1(m, n),
RESULT(0,0));
}
}
}
/*
* ChamUpper
*/
else {
/* Compute max norm between tiles */
for(m = 0; m < minMNT; m++) {
for(n = m; n < A->nt; n++) {
INSERT_TASK_dlange_max(
&options,
W1(m, n),
RESULT(0,0));
}
}
}
/* Copy max norm in tiles to dispatch on every nodes */
for(m = 0; m < A->p; m++) {
for(n = 0; n < A->q; n++) {
INSERT_TASK_dlacpy(
&options,
ChamUpperLower, 1, 1, 1,
RESULT(0,0), 1,
W1(m, n), 1 );
}
}
CHAMELEON_Desc_Flush( W1, sequence );
CHAMELEON_Desc_Flush( RESULT, sequence );
RUNTIME_sequence_wait(chamctxt, sequence);
*result = *(double *)W1->get_blkaddr(W1, A->myrank / A->q, A->myrank % A->q );
CHAMELEON_Desc_Destroy( &(W1) );
CHAMELEON_Desc_Destroy( &(RESULT) );
}
RUNTIME_options_ws_free(&options);
RUNTIME_options_finalize(&options, chamctxt);
}
......@@ -287,7 +287,7 @@ int CHAMELEON_zlange_Tile_Async( cham_normtype_t norm, CHAM_desc_t *A, double *v
return CHAMELEON_SUCCESS;
}
chameleon_pzlange( norm, A, value, sequence, request );
chameleon_pzlange_generic( norm, ChamUpperLower, ChamNonUnit, A, value, sequence, request );
return CHAMELEON_SUCCESS;
}
......@@ -295,7 +295,7 @@ int CHAMELEON_zlanhe_Tile_Async( cham_normtype_t norm, cham_uplo_t uplo, CHAM_de
return CHAMELEON_SUCCESS;
}
chameleon_pzlanhe( norm, uplo, A, value, sequence, request );
chameleon_pzlansy_generic( norm, uplo, ChamConjTrans, A, value, sequence, request );
return CHAMELEON_SUCCESS;
}
......@@ -295,7 +295,7 @@ int CHAMELEON_zlansy_Tile_Async( cham_normtype_t norm, cham_uplo_t uplo, CHAM_de
return CHAMELEON_SUCCESS;
}
chameleon_pzlansy( norm, uplo, A, value, sequence, request );
chameleon_pzlansy_generic( norm, uplo, ChamTrans, A, value, sequence, request );
return CHAMELEON_SUCCESS;
}
......@@ -323,7 +323,7 @@ int CHAMELEON_zlantr_Tile_Async( cham_normtype_t norm, cham_uplo_t uplo, cham_di
return CHAMELEON_SUCCESS;
}
chameleon_pzlantr( norm, uplo, diag, A, value, sequence, request );
chameleon_pzlange_generic( norm, uplo, diag, A, value, sequence, request );
return CHAMELEON_SUCCESS;
}
......@@ -82,10 +82,11 @@ void chameleon_pzher2k(cham_uplo_t uplo, cham_trans_t trans, CHAMELEON_Complex64
void chameleon_pzhetrd_he2hb(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *E, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
void chameleon_pzlacpy(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
void chameleon_pzlag2c(CHAM_desc_t *A, CHAM_desc_t *SB, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
void chameleon_pzlange(cham_normtype_t norm, CHAM_desc_t *A, double *result, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
void chameleon_pzlanhe(cham_normtype_t norm, cham_uplo_t uplo, CHAM_desc_t *A, double *result, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
void chameleon_pzlansy(cham_normtype_t norm, cham_uplo_t uplo, CHAM_desc_t *A, double *result, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
void chameleon_pzlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, double *result, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
double *result, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_trans_t trans,
CHAM_desc_t *A, double *result,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
void chameleon_pzlascal(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
void chameleon_pzlaset( cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
void chameleon_pzlaset2(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment