Attention une mise à jour du serveur va être effectuée le lundi 17 mai entre 13h et 13h30. Cette mise à jour va générer une interruption du service de quelques minutes.

Commit 5366cc02 authored by Mathieu Faverge's avatar Mathieu Faverge

Merge branch 'feature/lange' into 'master'

Feature/lange

See merge request !121
parents 3733c3d1 b7615ff3
......@@ -116,9 +116,7 @@ set(ZSRC
pzgetrf_nopiv.c
pzlacpy.c
pzlange.c
pzlanhe.c
pzlansy.c
pzlantr.c
pzlaset2.c
pzlaset.c
pzlauum.c
......
This diff is collapsed.
/**
*
* @file pzlanhe.c
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zlanhe parallel algorithm
*
* @version 1.0.0
* @comment This file has been automatically generated
* from Plasma 2.6.0 for CHAMELEON 1.0.0
* @author Emmanuel Agullo
* @author Mathieu Faverge
* @date 2010-11-15
* @precisions normal z -> c
*
*/
//ALLOC_WS : A->mb
//#include <stdlib.h>
//#include <math.h>
//WS_ADD : A->mb
#include "control/common.h"
#define A(m, n) A, m, n
#define VECNORMS_STEP1(m, n) VECNORMS_STEP1, m, n
#define VECNORMS_STEP2(m, n) VECNORMS_STEP2, m, n
#define RESULT(m, n) RESULT, m, n
/**
*
*/
/**
*
*/
void chameleon_pzlanhe(cham_normtype_t norm, cham_uplo_t uplo, CHAM_desc_t *A, double *result,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request)
{
CHAM_desc_t *VECNORMS_STEP1 = NULL;
CHAM_desc_t *VECNORMS_STEP2 = NULL;
CHAM_desc_t *RESULT = NULL;
CHAM_context_t *chamctxt;
RUNTIME_option_t options;
int workm, workn;
int tempkm, tempkn;
int ldam;
int m, n;
/* int part_p, part_q; */
/* part_p = A->myrank / A->q; */
/* part_q = A->myrank % A->q; */
chamctxt = chameleon_context_self();
if (sequence->status != CHAMELEON_SUCCESS)
return;
RUNTIME_options_init(&options, chamctxt, sequence, request);
*result = 0.0;
switch ( norm ) {
/*
* ChamOneNorm / ChamInfNorm
*/
case ChamOneNorm:
case ChamInfNorm:
/* Init workspace handle for the call to zlanhe */
RUNTIME_options_ws_alloc( &options, A->mb, 0 );
workm = A->m;
workn = chameleon_max( A->nt, A->q );
CHAMELEON_Desc_Create(&(VECNORMS_STEP1), NULL, ChamRealDouble, A->mb, 1, A->mb,
workm, workn, 0, 0, workm, workn, A->p, A->q);
CHAMELEON_Desc_Create(&(VECNORMS_STEP2), NULL, ChamRealDouble, A->mb, 1, A->mb,
workm, 1, 0, 0, workm, 1, A->p, A->q);
CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 1, 1,
1, 1, 0, 0, 1, 1, 1, 1);
/* Zeroes my intermediate vectors */
for(m = 0; m < A->mt; m++) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
for(n = 0; n < workn; n++) {
INSERT_TASK_dlaset(
&options,
ChamUpperLower, tempkm, 1,
0., 0.,
VECNORMS_STEP1(m, n), 1);
}
}
for(m = (A->myrank / A->q); m < A->mt; m+=A->p) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m);
/* compute sums of absolute values on diagonal tile m */
INSERT_TASK_dzasum(
&options,
ChamRowwise, uplo, tempkm, tempkm,
A(m, m), ldam, VECNORMS_STEP1(m, m));
/*
* ChamLower
*/
if (uplo == ChamLower) {
//for(n = A->myrank % A->q; n < m; n+=A->q) {
for(n = 0; n < m; n++) {
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
/* compute sums of absolute values on rows of tile m */
INSERT_TASK_dzasum(
&options,
ChamRowwise, ChamUpperLower, tempkm, tempkn,
A(m, n), ldam, VECNORMS_STEP1(m, n));
/* same operation on the symmetric part */
INSERT_TASK_dzasum(
&options,
ChamColumnwise, ChamUpperLower, tempkm, tempkn,
A(m, n), ldam, VECNORMS_STEP1(n, m));
}
}
/*
* ChamUpper
*/
else {
// for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q );
// n < A->mt; n+=A->q) {
for(n = m+1; n < A->mt; n++) {
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
/* compute sums of absolute values on rows of tile m */
INSERT_TASK_dzasum(
&options,
ChamRowwise, ChamUpperLower, tempkm, tempkn,
A(m, n), ldam, VECNORMS_STEP1(m, n));
/* same operation on the symmetric part */
INSERT_TASK_dzasum(
&options,
ChamColumnwise, ChamUpperLower, tempkm, tempkn,
A(m, n), ldam, VECNORMS_STEP1(n, m));
}
}
}
/* compute vector sum between tiles in rows */
for(m = (A->myrank / A->q); m < A->mt; m+=A->p) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
INSERT_TASK_dlaset(
&options,
ChamUpperLower, tempkm, 1,
0., 0.,
VECNORMS_STEP2(m, 0), 1);
for(n = 0; n < A->nt; n++) {
INSERT_TASK_dgeadd(
&options,
ChamNoTrans, tempkm, 1, A->mb,
1.0, VECNORMS_STEP1(m, n), tempkm,
1.0, VECNORMS_STEP2(m, 0), tempkm);
}
/*
* Compute max norm of each segment of the final vector in the
* previous workspace
*/
INSERT_TASK_dlange(
&options,
ChamMaxNorm, tempkm, 1, A->nb,
VECNORMS_STEP2(m, 0), tempkm,
VECNORMS_STEP1(m, 0));
}
/* Initialize RESULT array */
INSERT_TASK_dlaset(
&options,
ChamUpperLower, 1, 1,
0., 0.,
RESULT(0,0), 1);
/* compute max norm between tiles in the column */
if (A->myrank % A->q == 0) {
for(m = 0; m < A->mt; m++) {
INSERT_TASK_dlange_max(
&options,
VECNORMS_STEP1(m, 0),
RESULT(0,0));
}
}
/* Scatter norm over processus */
for(m = 0; m < A->p; m++) {
for(n = 0; n < A->q; n++) {
INSERT_TASK_dlacpy(
&options,
ChamUpperLower, 1, 1, 1,
RESULT(0,0), 1,
VECNORMS_STEP1(m, n), 1 );
}
}
CHAMELEON_Desc_Flush( VECNORMS_STEP2, sequence );
CHAMELEON_Desc_Flush( VECNORMS_STEP1, sequence );
CHAMELEON_Desc_Flush( RESULT, sequence );
RUNTIME_sequence_wait(chamctxt, sequence);
*result = *(double *)VECNORMS_STEP1->get_blkaddr(VECNORMS_STEP1, A->myrank / A->q, A->myrank % A->q );
CHAMELEON_Desc_Destroy( &(VECNORMS_STEP1) );
CHAMELEON_Desc_Destroy( &(VECNORMS_STEP2) );
CHAMELEON_Desc_Destroy( &(RESULT) );
break;
/*
* ChamFrobeniusNorm
*/
case ChamFrobeniusNorm:
workm = chameleon_max( A->mt, A->p );
workn = chameleon_max( A->nt, A->q );
CHAMELEON_Desc_Create(&(VECNORMS_STEP1), NULL, ChamRealDouble, 1, 2, 2,
workm, 2*workn, 0, 0, workm, 2*workn, A->p, A->q);
CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 2, 2,
1, 2, 0, 0, 1, 2, 1, 1);
/* Compute local norm to each tile */
for(m = (A->myrank / A->q); m < A->mt; m+=A->p) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m);
/* Zeroes my intermediate vectors */
for(n = A->myrank % A->q; n < workn; n+=A->q) {
INSERT_TASK_dlaset(
&options,
ChamUpperLower, 1, 2,
1., 0.,
VECNORMS_STEP1(m,n), 1);
}
/* compute norm on diagonal tile m */
INSERT_TASK_zhessq(
&options,
uplo, tempkm,
A(m, m), ldam,
VECNORMS_STEP1(m, m));
/*
* ChamLower
*/
if (uplo == ChamLower) {
//for(n = A->myrank % A->q; n < m; n+=A->q) {
for(n = 0; n < m; n++) {
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
/* compute norm on the lower part */
INSERT_TASK_zgessq(
&options,
tempkm, tempkn,
A(m, n), ldam,
VECNORMS_STEP1(m, n));
/* same operation on the symmetric part */
INSERT_TASK_zgessq(
&options,
tempkm, tempkn,
A(m, n), ldam,
VECNORMS_STEP1(m, n));
}
}
/*
* ChamUpper
*/
else {
// for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q );
// n < A->mt; n+=A->q) {
for(n = m+1; n < A->mt; n++) {
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
/* compute norm on the lower part */
INSERT_TASK_zgessq(
&options,
tempkm, tempkn,
A(m, n), ldam,
VECNORMS_STEP1(m, n));
/* same operation on the symmetric part */
INSERT_TASK_zgessq(
&options,
tempkm, tempkn,
A(m, n), ldam,
VECNORMS_STEP1(m, n));
}
}
}
/* Initialize arrays */
INSERT_TASK_dlaset(
&options,
ChamUpperLower, 1, 2,
1., 0.,
RESULT(0,0), 1);
/* Compute accumulation of scl and ssq */
for(m = (A->myrank / A->q); m < A->mt; m+=A->p) {
/*
* ChamLower
*/
if (uplo == ChamLower) {
//for(n = A->myrank % A->q; n < m; n+=A->q) {
for(n = 0; n <= m; n++) {
INSERT_TASK_dplssq(
&options,
VECNORMS_STEP1(m, n),
RESULT(0,0));
}
}
/*
* ChamUpper
*/
else {
// for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q );
// n < A->mt; n+=A->q) {
for(n = m; n < A->mt; n++) {
INSERT_TASK_dplssq(
&options,
VECNORMS_STEP1(m, n),
RESULT(0,0));
}
}
}
/* Compute scl * sqrt(ssq) */
INSERT_TASK_dplssq2(
&options,
RESULT(0,0));
/* Copy max norm in tiles to dispatch on every nodes */
for(m = 0; m < A->p; m++) {
for(n = 0; n < A->q; n++) {
INSERT_TASK_dlacpy(
&options,
ChamUpperLower, 1, 1, 1,
RESULT(0,0), 1,
VECNORMS_STEP1(m, n), 1 );
}
}
CHAMELEON_Desc_Flush( VECNORMS_STEP1, sequence );
CHAMELEON_Desc_Flush( RESULT, sequence );
RUNTIME_sequence_wait(chamctxt, sequence);
*result = *(double *)VECNORMS_STEP1->get_blkaddr(VECNORMS_STEP1, A->myrank / A->q, A->myrank % A->q );
CHAMELEON_Desc_Destroy( &(VECNORMS_STEP1) );
CHAMELEON_Desc_Destroy( &(RESULT) );
break;
/*
* ChamMaxNorm
*/
case ChamMaxNorm:
default:
/* Init workspace handle for the call to zlange but unused */
RUNTIME_options_ws_alloc( &options, 1, 0 );
workm = chameleon_max( A->mt, A->p );
workn = chameleon_max( A->nt, A->q );
CHAMELEON_Desc_Create(&(VECNORMS_STEP1), NULL, ChamRealDouble, 1, 1, 1,
workm, workn, 0, 0, workm, workn, A->p, A->q);
CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 1, 1,
1, 1, 0, 0, 1, 1, 1, 1);
/* Compute local maximum to each tile */
for(m = 0; m < A->mt; m++) {
tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m);
INSERT_TASK_zlanhe(
&options,
ChamMaxNorm, uplo, tempkm, A->nb,
A(m, m), ldam,
VECNORMS_STEP1(m, m));
/*
* ChamLower
*/
if (uplo == ChamLower) {
//for(n = A->myrank % A->q; n < m; n+=A->q) {
for(n = 0; n < m; n++) {
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
INSERT_TASK_zlange(
&options,
ChamMaxNorm, tempkm, tempkn, A->nb,
A(m, n), ldam,
VECNORMS_STEP1(m, n));
}
}
/*
* ChamUpper
*/
else {
//for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q );
// n < A->mt; n+=A->q) {
for(n = m+1; n < A->mt; n++) {
tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
INSERT_TASK_zlange(
&options,
ChamMaxNorm, tempkm, tempkn, A->nb,
A(m, n), ldam,
VECNORMS_STEP1(m, n));
}
}
}
/* Initialize RESULT array */
INSERT_TASK_dlaset(
&options,
ChamUpperLower, 1, 1,
0., 0.,
RESULT(0,0), 1);
/* Compute max norm between tiles */
for(m = 0; m < A->mt; m++) {
/*
* ChamLower
*/
if (uplo == ChamLower) {
//for(n = A->myrank % A->q; n < m; n+=A->q) {
for(n = 0; n <= m; n++) {
INSERT_TASK_dlange_max(
&options,
VECNORMS_STEP1(m, n),
RESULT(0,0));
}
}
/*
* ChamUpper
*/
else {
//for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q );
// n < A->mt; n+=A->q) {
for(n = m; n < A->mt; n++) {
INSERT_TASK_dlange_max(
&options,
VECNORMS_STEP1(m, n),
RESULT(0,0));
}
}
}
/* Copy max norm in tiles to dispatch on every nodes */
for(m = 0; m < A->p; m++) {
for(n = 0; n < A->q; n++) {
INSERT_TASK_dlacpy(
&options,
ChamUpperLower, 1, 1, 1,
RESULT(0,0), 1,
VECNORMS_STEP1(m, n), 1 );
}
}
CHAMELEON_Desc_Flush( VECNORMS_STEP1, sequence );
CHAMELEON_Desc_Flush( RESULT, sequence );
RUNTIME_sequence_wait(chamctxt, sequence);
*result = *(double *)VECNORMS_STEP1->get_blkaddr(VECNORMS_STEP1, A->myrank / A->q, A->myrank % A->q );
CHAMELEON_Desc_Destroy( &(VECNORMS_STEP1) );
CHAMELEON_Desc_Destroy( &(RESULT) );
}
RUNTIME_options_ws_free(&options);
RUNTIME_options_finalize(&options, chamctxt);
}
This diff is collapsed.
This diff is collapsed.
......@@ -287,7 +287,7 @@ int CHAMELEON_zlange_Tile_Async( cham_normtype_t norm, CHAM_desc_t *A, double *v
return CHAMELEON_SUCCESS;
}
chameleon_pzlange( norm, A, value, sequence, request );
chameleon_pzlange_generic( norm, ChamUpperLower, ChamNonUnit, A, value, sequence, request );
return CHAMELEON_SUCCESS;
}
......@@ -295,7 +295,7 @@ int CHAMELEON_zlanhe_Tile_Async( cham_normtype_t norm, cham_uplo_t uplo, CHAM_de
return CHAMELEON_SUCCESS;
}
chameleon_pzlanhe( norm, uplo, A, value, sequence, request );
chameleon_pzlansy_generic( norm, uplo, ChamConjTrans, A, value, sequence, request );
return CHAMELEON_SUCCESS;
}
......@@ -295,7 +295,7 @@ int CHAMELEON_zlansy_Tile_Async( cham_normtype_t norm, cham_uplo_t uplo, CHAM_de
return CHAMELEON_SUCCESS;
}
chameleon_pzlansy( norm, uplo, A, value, sequence, request );
chameleon_pzlansy_generic( norm, uplo, ChamTrans, A, value, sequence, request );
return CHAMELEON_SUCCESS;
}
......@@ -323,7 +323,7 @@ int CHAMELEON_zlantr_Tile_Async( cham_normtype_t norm, cham_uplo_t uplo, cham_di
return CHAMELEON_SUCCESS;
}
chameleon_pzlantr( norm, uplo, diag, A, value, sequence, request );
chameleon_pzlange_generic( norm, uplo, diag, A, value, sequence, request );
return CHAMELEON_SUCCESS;
}
......@@ -82,10 +82,11 @@ void chameleon_pzher2k(cham_uplo_t uplo, cham_trans_t trans, CHAMELEON_Complex64
void chameleon_pzhetrd_he2hb(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *E, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
void chameleon_pzlacpy(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
void chameleon_pzlag2c(CHAM_desc_t *A, CHAM_desc_t *SB, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
void chameleon_pzlange(cham_normtype_t norm, CHAM_desc_t *A, double *result, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
void chameleon_pzlanhe(cham_normtype_t norm, cham_uplo_t uplo, CHAM_desc_t *A, double *result, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
void chameleon_pzlansy(cham_normtype_t norm, cham_uplo_t uplo, CHAM_desc_t *A, double *result, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
void chameleon_pzlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, double *result, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
double *result, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_trans_t trans,
CHAM_desc_t *A, double *result,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
void chameleon_pzlascal(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
void chameleon_pzlaset( cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
void chameleon_pzlaset2(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment