Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 89116d0a authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

Fix auxiliary routines

parent 125faf10
No related branches found
No related tags found
No related merge requests found
......@@ -29,10 +29,10 @@
* Parallel initialization a 2-D array A to BETA on the diagonal and
* ALPHA on the offdiagonals.
*/
void chameleon_pzlaset(cham_uplo_t uplo,
CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta,
CHAM_desc_t *A,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request)
void chameleon_pzlaset( cham_uplo_t uplo,
CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta,
CHAM_desc_t *A,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
{
CHAM_context_t *chamctxt;
RUNTIME_option_t options;
......@@ -71,18 +71,28 @@ void chameleon_pzlaset(cham_uplo_t uplo,
}
}
else if (uplo == ChamUpper) {
for (j = 0; j < A->nt; j++){
tempjn = j == A->nt-1 ? A->n-j*A->nb : A->nb;
for (i = 0; i < chameleon_min(j+1, A->mt); i++){
tempim = i == A->mt-1 ? A->m-i*A->mb : A->mb;
ldai = BLKLDD(A, i);
INSERT_TASK_zlaset(
&options,
ChamUpperLower, tempim, tempjn,
alpha, (i == j) ? beta : alpha,
A(i, j), ldai);
}
}
for (i = 0; i < A->mt; i++) {
tempim = i == A->mt-1 ? A->m-i*A->mb : A->mb;
ldai = BLKLDD(A, i);
if ( i < A->nt ) {
j = i;
tempjn = j == A->nt-1 ? A->n-j*A->nb : A->nb;
INSERT_TASK_zlaset(
&options,
uplo, tempim, tempjn,
alpha, beta, A(i, j), ldai);
}
for (j = i+1; j < A->nt; j++) {
tempjn = j == A->nt-1 ? A->n-j*A->nb : A->nb;
INSERT_TASK_zlaset(
&options,
ChamUpperLower, tempim, tempjn,
alpha, alpha, A(i, j), ldai);
}
}
}
else {
for (i = 0; i < A->mt; i++){
......
......@@ -12,9 +12,6 @@
* @brief Chameleon zplghe parallel algorithm
*
* @version 0.9.2
* @comment This file is a copy from pzplghe.c
* wich has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
......@@ -31,13 +28,13 @@
* chameleon_pzplghe - Generate a random hermitian (positive definite if 'bump' is large enough) half-matrix by tiles.
*/
void chameleon_pzplghe( double bump, cham_uplo_t uplo, CHAM_desc_t *A,
unsigned long long int seed,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
unsigned long long int seed,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
{
CHAM_context_t *chamctxt;
RUNTIME_option_t options;
int m, n;
int m, n, minmn;
int ldam;
int tempmm, tempnn;
......@@ -47,16 +44,15 @@ void chameleon_pzplghe( double bump, cham_uplo_t uplo, CHAM_desc_t *A,
}
RUNTIME_options_init(&options, chamctxt, sequence, request);
for (m = 0; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m);
minmn = chameleon_min( A->mt, A->nt );
switch ( uplo ) {
case ChamLower:
for (n = 0; n < minmn; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
/*
* ChamLower
*/
if (uplo == ChamLower) {
for (n = 0; n <= m; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
for (m = n; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m);
options.priority = m + n;
INSERT_TASK_zplghe(
......@@ -65,10 +61,13 @@ void chameleon_pzplghe( double bump, cham_uplo_t uplo, CHAM_desc_t *A,
A->m, m*A->mb, n*A->nb, seed );
}
}
/*
* ChamUpper
*/
else if (uplo == ChamUpper) {
break;
case ChamUpper:
for (m = 0; m < minmn; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m);
for (n = m; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
......@@ -79,13 +78,17 @@ void chameleon_pzplghe( double bump, cham_uplo_t uplo, CHAM_desc_t *A,
A->m, m*A->mb, n*A->nb, seed );
}
}
/*
* ChamUpperLower
*/
else {
case ChamUpperLower:
default:
for (m = 0; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m);
for (n = 0; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
options.priority = m + n;
INSERT_TASK_zplghe(
&options,
bump, tempmm, tempnn, A(m, n), ldam,
......
......@@ -12,9 +12,6 @@
* @brief Chameleon zplgsy parallel algorithm
*
* @version 0.9.2
* @comment This file is a copy of pzplgsy.c,
wich has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
......@@ -31,13 +28,13 @@
* chameleon_pzplgsy - Generate a random symmetric (positive definite if 'bump' is large enough) half-matrix by tiles.
*/
void chameleon_pzplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_t *A,
unsigned long long int seed,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
unsigned long long int seed,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
{
CHAM_context_t *chamctxt;
RUNTIME_option_t options;
int m, n;
int m, n, minmn;
int ldam;
int tempmm, tempnn;
......@@ -47,16 +44,15 @@ void chameleon_pzplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_
}
RUNTIME_options_init(&options, chamctxt, sequence, request);
for (m = 0; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m);
minmn = chameleon_min( A->mt, A->nt );
switch ( uplo ) {
case ChamLower:
for (n = 0; n < minmn; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
/*
* ChamLower
*/
if (uplo == ChamLower) {
for (n = 0; n <= m; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
for (m = n; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m);
options.priority = m + n;
INSERT_TASK_zplgsy(
......@@ -65,10 +61,13 @@ void chameleon_pzplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_
A->m, m*A->mb, n*A->nb, seed );
}
}
/*
* ChamUpper
*/
else if (uplo == ChamUpper) {
break;
case ChamUpper:
for (m = 0; m < minmn; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m);
for (n = m; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
......@@ -79,13 +78,17 @@ void chameleon_pzplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_
A->m, m*A->mb, n*A->nb, seed );
}
}
/*
* ChamUpperLower
*/
else {
case ChamUpperLower:
default:
for (m = 0; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m);
for (n = 0; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
options.priority = m + n;
INSERT_TASK_zplgsy(
&options,
bump, tempmm, tempnn, A(m, n), ldam,
......
......@@ -179,7 +179,7 @@ int CHAMELEON_zlascal( cham_uplo_t uplo, int M, int N,
*
*/
int CHAMELEON_zlascal_Tile( cham_uplo_t uplo,
CHAMELEON_Complex64_t alpha, CHAM_desc_t *A )
CHAMELEON_Complex64_t alpha, CHAM_desc_t *A )
{
CHAM_context_t *chamctxt;
RUNTIME_sequence_t *sequence = NULL;
......
......@@ -243,7 +243,7 @@ int CHAMELEON_zpotrf_Tile( cham_uplo_t uplo, CHAM_desc_t *A )
*
*/
int CHAMELEON_zpotrf_Tile_Async( cham_uplo_t uplo, CHAM_desc_t *A,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
{
CHAM_context_t *chamctxt;
......
......@@ -21,6 +21,7 @@
*
*/
#include "coreblas.h"
#include "coreblas/lapacke.h"
/**
******************************************************************************
......@@ -112,12 +113,21 @@ int CORE_zgeadd(cham_trans_t trans, int M, int N,
return -8;
}
if ( beta == 0. ) {
LAPACKE_zlaset_work( LAPACK_COL_MAJOR, 'A',
M, N, 0., 0., B, LDB );
}
else if ( beta != 1. ) {
LAPACKE_zlascl_work( LAPACK_COL_MAJOR, 'G',
0, 0, 1., beta, M, N, B, LDB );
}
switch( trans ) {
#if defined(PRECISION_z) || defined(PRECISION_c)
case ChamConjTrans:
for (j=0; j<N; j++, A++) {
for(i=0; i<M; i++, B++) {
*B = beta * (*B) + alpha * conj(A[LDA*i]);
*B += alpha * conj(A[LDA*i]);
}
B += LDB-M;
}
......@@ -127,7 +137,7 @@ int CORE_zgeadd(cham_trans_t trans, int M, int N,
case ChamTrans:
for (j=0; j<N; j++, A++) {
for(i=0; i<M; i++, B++) {
*B = beta * (*B) + alpha * A[LDA*i];
*B += alpha * A[LDA*i];
}
B += LDB-M;
}
......@@ -137,7 +147,7 @@ int CORE_zgeadd(cham_trans_t trans, int M, int N,
default:
for (j=0; j<N; j++) {
for(i=0; i<M; i++, B++, A++) {
*B = beta * (*B) + alpha * (*A);
*B += alpha * (*A);
}
A += LDA-M;
B += LDB-M;
......
......@@ -78,10 +78,21 @@ void CORE_zplghe( double bump, int m, int n, CHAMELEON_Complex64_t *A, int lda,
* Tile diagonal
*/
if ( m0 == n0 ) {
for (j = 0; j < n; j++) {
int minmn = chameleon_min( m, n );
/* Lower part */
for (j = 0; j < minmn; j++) {
ran = Rnd64_jump( NBELEM * jump, seed );
for (i = j; i < m; i++) {
*tmp = 0.5f - ran * RndF_Mul;
ran = Rnd64_A * ran + Rnd64_C;
#if defined(PRECISION_z) || defined(PRECISION_c)
ran = Rnd64_A * ran + Rnd64_C;
#endif
*tmp = creal( *tmp + bump );
tmp++;
for (i = j+1; i < m; i++) {
*tmp = 0.5f - ran * RndF_Mul;
ran = Rnd64_A * ran + Rnd64_C;
#if defined(PRECISION_z) || defined(PRECISION_c)
......@@ -94,16 +105,21 @@ void CORE_zplghe( double bump, int m, int n, CHAMELEON_Complex64_t *A, int lda,
jump += bigM + 1;
}
for (j = 0; j < n; j++) {
/* Upper part */
jump = (unsigned long long int)m0 + (unsigned long long int)n0 * (unsigned long long int)bigM;
for (i = 0; i < minmn; i++) {
ran = Rnd64_jump( NBELEM * (jump+i+1), seed );
for (j = i+1; j < n; j++) {
A[j*lda+i] = 0.5f - ran * RndF_Mul;
ran = Rnd64_A * ran + Rnd64_C;
#if defined(PRECISION_z) || defined(PRECISION_c)
A[j+j*lda] += bump - I*cimag( A[j+j*lda] );
#else
A[j+j*lda] += bump;
A[j*lda+i] -= I*(0.5f - ran * RndF_Mul);
ran = Rnd64_A * ran + Rnd64_C;
#endif
for (i=0; i<j; i++) {
A[lda*j+i] = conj( A[lda*i+j] );
}
jump += bigM;
}
}
/*
......@@ -148,6 +164,3 @@ void CORE_zplghe( double bump, int m, int n, CHAMELEON_Complex64_t *A, int lda,
}
}
}
......@@ -63,7 +63,6 @@ Rnd64_jump(unsigned long long int n, unsigned long long int seed ) {
return ran;
}
// CORE_zplgsy - Generate a tile for random symmetric (positive definite if 'bump' is large enough) matrix.
void CORE_zplgsy( CHAMELEON_Complex64_t bump, int m, int n, CHAMELEON_Complex64_t *A, int lda,
......@@ -79,10 +78,22 @@ void CORE_zplgsy( CHAMELEON_Complex64_t bump, int m, int n, CHAMELEON_Complex64_
* Tile diagonal
*/
if ( m0 == n0 ) {
for (j = 0; j < n; j++) {
int minmn = chameleon_min( m, n );
/* Lower part */
for (j = 0; j < minmn; j++) {
ran = Rnd64_jump( NBELEM * jump, seed );
for (i = j; i < m; i++) {
*tmp = 0.5f - ran * RndF_Mul;
ran = Rnd64_A * ran + Rnd64_C;
#if defined(PRECISION_z) || defined(PRECISION_c)
*tmp += I*(0.5f - ran * RndF_Mul);
ran = Rnd64_A * ran + Rnd64_C;
#endif
*tmp = *tmp + bump;
tmp++;
for (i = j+1; i < m; i++) {
*tmp = 0.5f - ran * RndF_Mul;
ran = Rnd64_A * ran + Rnd64_C;
#if defined(PRECISION_z) || defined(PRECISION_c)
......@@ -95,12 +106,21 @@ void CORE_zplgsy( CHAMELEON_Complex64_t bump, int m, int n, CHAMELEON_Complex64_
jump += bigM + 1;
}
for (j = 0; j < n; j++) {
A[j+j*lda] += bump;
/* Upper part */
jump = (unsigned long long int)m0 + (unsigned long long int)n0 * (unsigned long long int)bigM;
for (i=0; i<j; i++) {
A[lda*j+i] = A[lda*i+j];
for (i = 0; i < minmn; i++) {
ran = Rnd64_jump( NBELEM * (jump+i+1), seed );
for (j = i+1; j < n; j++) {
A[j*lda+i] = 0.5f - ran * RndF_Mul;
ran = Rnd64_A * ran + Rnd64_C;
#if defined(PRECISION_z) || defined(PRECISION_c)
A[j*lda+i] += I*(0.5f - ran * RndF_Mul);
ran = Rnd64_A * ran + Rnd64_C;
#endif
}
jump += bigM;
}
}
/*
......@@ -145,6 +165,3 @@ void CORE_zplgsy( CHAMELEON_Complex64_t bump, int m, int n, CHAMELEON_Complex64_
}
}
}
......@@ -20,6 +20,7 @@
*
*/
#include "coreblas.h"
#include "coreblas/lapacke.h"
/**
******************************************************************************
......@@ -95,7 +96,7 @@ int CORE_ztradd(cham_uplo_t uplo, cham_trans_t trans, int M, int N,
CHAMELEON_Complex64_t beta,
CHAMELEON_Complex64_t *B, int LDB)
{
int i, j;
int i, j, minMN;
if (uplo == ChamUpperLower){
int rc = CORE_zgeadd( trans, M, N, alpha, A, LDA, beta, B, LDB );
......@@ -137,6 +138,17 @@ int CORE_ztradd(cham_uplo_t uplo, cham_trans_t trans, int M, int N,
return -9;
}
minMN = chameleon_min( M, N );
if ( beta == 0. ) {
LAPACKE_zlaset_work( LAPACK_COL_MAJOR, chameleon_lapack_const(uplo),
M, N, 0., 0., B, LDB );
}
else if ( beta != 1. ) {
LAPACKE_zlascl_work( LAPACK_COL_MAJOR, chameleon_lapack_const(uplo),
0, 0, 1., beta, M, N, B, LDB );
}
/**
* ChamLower
*/
......@@ -144,9 +156,9 @@ int CORE_ztradd(cham_uplo_t uplo, cham_trans_t trans, int M, int N,
switch( trans ) {
#if defined(PRECISION_z) || defined(PRECISION_c)
case ChamConjTrans:
for (j=0; j<N; j++, A++) {
for (j=0; j<minMN; j++, A++) {
for(i=j; i<M; i++, B++) {
*B = beta * (*B) + alpha * conj(A[LDA*i]);
*B += alpha * conj(A[LDA*i]);
}
B += LDB-M+j+1;
}
......@@ -154,9 +166,9 @@ int CORE_ztradd(cham_uplo_t uplo, cham_trans_t trans, int M, int N,
#endif /* defined(PRECISION_z) || defined(PRECISION_c) */
case ChamTrans:
for (j=0; j<N; j++, A++) {
for (j=0; j<minMN; j++, A++) {
for(i=j; i<M; i++, B++) {
*B = beta * (*B) + alpha * A[LDA*i];
*B += alpha * A[LDA*i];
}
B += LDB-M+j+1;
}
......@@ -164,9 +176,9 @@ int CORE_ztradd(cham_uplo_t uplo, cham_trans_t trans, int M, int N,
case ChamNoTrans:
default:
for (j=0; j<N; j++) {
for (j=0; j<minMN; j++) {
for(i=j; i<M; i++, B++, A++) {
*B = beta * (*B) + alpha * (*A);
*B += alpha * (*A);
}
B += LDB-M+j+1;
A += LDA-M+j+1;
......@@ -183,7 +195,7 @@ int CORE_ztradd(cham_uplo_t uplo, cham_trans_t trans, int M, int N,
for (j=0; j<N; j++, A++) {
int mm = chameleon_min( j+1, M );
for(i=0; i<mm; i++, B++) {
*B = beta * (*B) + alpha * conj(A[LDA*i]);
*B += alpha * conj(A[LDA*i]);
}
B += LDB-mm;
}
......@@ -194,7 +206,7 @@ int CORE_ztradd(cham_uplo_t uplo, cham_trans_t trans, int M, int N,
for (j=0; j<N; j++, A++) {
int mm = chameleon_min( j+1, M );
for(i=0; i<mm; i++, B++) {
*B = beta * (*B) + alpha * (A[LDA*i]);
*B += alpha * (A[LDA*i]);
}
B += LDB-mm;
}
......@@ -205,7 +217,7 @@ int CORE_ztradd(cham_uplo_t uplo, cham_trans_t trans, int M, int N,
for (j=0; j<N; j++) {
int mm = chameleon_min( j+1, M );
for(i=0; i<mm; i++, B++, A++) {
*B = beta * (*B) + alpha * (*A);
*B += alpha * (*A);
}
B += LDB-mm;
A += LDA-mm;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment