Commit 6b6a8255 authored by Mathieu Faverge's avatar Mathieu Faverge

Merge branch 'feature/genericssq' into 'master'

Feature/genericssq

See merge request solverstack/chameleon!148
parents 3ca82fe8 2d0a51cb
......@@ -329,13 +329,14 @@ chameleon_pzlange_frb( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_
else {
INSERT_TASK_zgessq(
options,
ChamEltwise,
tempmm, tempnn,
A(m, n), ldam, W( Welt, m, n) );
}
if ( n >= Q ) {
INSERT_TASK_dplssq(
options, W( Welt, m, n), W( Welt, m, n%Q) );
options, ChamEltwise, 1, 1, W( Welt, m, n), W( Welt, m, n%Q) );
}
}
......@@ -345,7 +346,7 @@ chameleon_pzlange_frb( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_
*/
for(n = 1; n < Q; n++) {
INSERT_TASK_dplssq(
options, W( Welt, m, n), W( Welt, m, 0) );
options, ChamEltwise, 1, 1, W( Welt, m, n), W( Welt, m, 0) );
}
}
......@@ -355,7 +356,7 @@ chameleon_pzlange_frb( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_
*/
for(m = P; m < MT; m++) {
INSERT_TASK_dplssq(
options, W( Welt, m, 0), W( Welt, m%P, 0) );
options, ChamEltwise, 1, 1, W( Welt, m, 0), W( Welt, m%P, 0) );
}
/**
......@@ -364,11 +365,11 @@ chameleon_pzlange_frb( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_
*/
for(m = 1; m < P; m++) {
INSERT_TASK_dplssq(
options, W( Welt, m, 0), W( Welt, 0, 0) );
options, ChamEltwise, 1, 1, W( Welt, m, 0), W( Welt, 0, 0) );
}
INSERT_TASK_dplssq2(
options, W( Welt, 0, 0) );
options, 1, W( Welt, 0, 0) );
}
/**
......
......@@ -241,21 +241,21 @@ chameleon_pzlansy_frb( cham_trans_t trans, cham_uplo_t uplo,
if ( n == m ) {
if ( trans == ChamConjTrans) {
INSERT_TASK_zhessq(
options, uplo, tempmm,
options, ChamEltwise, uplo, tempmm,
A(m, n), ldam, W( Welt, m, n) );
}
else {
INSERT_TASK_zsyssq(
options, uplo, tempmm,
options, ChamEltwise, uplo, tempmm,
A(m, n), ldam, W( Welt, m, n) );
}
}
else {
INSERT_TASK_zgessq(
options, tempmm, tempnn,
options, ChamEltwise, tempmm, tempnn,
A(m, n), ldam, W( Welt, m, n) );
INSERT_TASK_zgessq(
options, tempmm, tempnn,
options, ChamEltwise, tempmm, tempnn,
A(m, n), ldam, W( Welt, n, m) );
}
}
......@@ -264,7 +264,7 @@ chameleon_pzlansy_frb( cham_trans_t trans, cham_uplo_t uplo,
for(m = 0; m < MT; m++) {
for(n = Q; n < NT; n++) {
INSERT_TASK_dplssq(
options, W( Welt, m, n), W( Welt, m, n%Q) );
options, ChamEltwise, 1, 1, W( Welt, m, n), W( Welt, m, n%Q) );
}
/**
......@@ -273,7 +273,7 @@ chameleon_pzlansy_frb( cham_trans_t trans, cham_uplo_t uplo,
*/
for(n = 1; n < Q; n++) {
INSERT_TASK_dplssq(
options, W( Welt, m, n), W( Welt, m, 0) );
options, ChamEltwise, 1, 1, W( Welt, m, n), W( Welt, m, 0) );
}
}
......@@ -283,7 +283,7 @@ chameleon_pzlansy_frb( cham_trans_t trans, cham_uplo_t uplo,
*/
for(m = P; m < MT; m++) {
INSERT_TASK_dplssq(
options, W( Welt, m, 0), W( Welt, m%P, 0) );
options, ChamEltwise, 1, 1, W( Welt, m, 0), W( Welt, m%P, 0) );
}
/**
......@@ -292,11 +292,11 @@ chameleon_pzlansy_frb( cham_trans_t trans, cham_uplo_t uplo,
*/
for(m = 1; m < P; m++) {
INSERT_TASK_dplssq(
options, W( Welt, m, 0), W( Welt, 0, 0) );
options, ChamEltwise, 1, 1, W( Welt, m, 0), W( Welt, 0, 0) );
}
INSERT_TASK_dplssq2(
options, W( Welt, 0, 0) );
options, 1, W( Welt, 0, 0) );
}
/**
......
......@@ -66,6 +66,7 @@ set(ZSRC
core_zplghe.c
core_zplgsy.c
core_zplrnt.c
core_zplssq.c
core_zpotrf.c
core_zssssm.c
core_zsymm.c
......
......@@ -15,23 +15,98 @@
* @comment This file has been automatically generated
* from Plasma 2.6.0 for CHAMELEON 0.9.2
* @author Mathieu Faverge
* @author Florent Pruvost
* @date 2014-11-16
* @precisions normal z -> c d s
*
*/
#include <math.h>
#include "coreblas/lapacke.h"
#include "coreblas/sumsq_update.h"
#include "coreblas.h"
#define UPDATE( __nb, __value ) \
if (__value != 0. ){ \
if ( *scale < __value ) { \
*sumsq = __nb + (*sumsq) * ( *scale / __value ) * ( *scale / __value ); \
*scale = __value; \
} else { \
*sumsq = *sumsq + __nb * ( __value / *scale ) * ( __value / *scale ); \
} \
/**
* @brief Subcase storev == ChamColumnwise of CORE_zgessq()
*/
static inline int
CORE_zgessq_col( int M, int N,
const CHAMELEON_Complex64_t *A, int LDA,
double *sclssq )
{
int i, j;
double tmp;
double *ptr, *tmpScale, *tmpSumsq;
for(j=0; j<N; j++) {
ptr = (double*) ( A + j * LDA );
tmpScale = sclssq+2*j;
tmpSumsq = sclssq+2*j+1;
for(i=0; i<M; i++, ptr++) {
tmp = fabs(*ptr);
sumsq_update( 1., tmpScale, tmpSumsq, &tmp );
#if defined(PRECISION_z) || defined(PRECISION_c)
ptr++;
tmp = fabs(*ptr);
sumsq_update( 1., tmpScale, tmpSumsq, &tmp );
#endif
}
}
return CHAMELEON_SUCCESS;
}
/**
* @brief Subcase storev == ChamRowwise of CORE_zgessq()
*/
static inline int
CORE_zgessq_row( int M, int N,
const CHAMELEON_Complex64_t *A, int LDA,
double *sclssq )
{
int i, j;
double tmp;
double *ptr, *tmpScale, *tmpSumsq;
for(j=0; j<N; j++) {
ptr = (double*) ( A + j * LDA );
tmpScale = sclssq;
tmpSumsq = sclssq+1;
for(i=0; i<M; i++, ptr++, tmpScale+=2, tmpSumsq+=2) {
tmp = fabs(*ptr);
sumsq_update( 1., tmpScale, tmpSumsq, &tmp );
#if defined(PRECISION_z) || defined(PRECISION_c)
ptr++;
tmp = fabs(*ptr);
sumsq_update( 1., tmpScale, tmpSumsq, &tmp );
#endif
}
}
return CHAMELEON_SUCCESS;
}
/**
* @brief Subcase storev == ChamEltwise of CORE_zgessq()
*/
static inline int
CORE_zgessq_elt( int M, int N,
const CHAMELEON_Complex64_t *A, int LDA,
double *sclssq )
{
int i, j;
double tmp;
double *ptr;
for(j=0; j<N; j++) {
ptr = (double*) ( A + j * LDA );
for(i=0; i<M; i++, ptr++) {
tmp = fabs(*ptr);
sumsq_update( 1., sclssq, sclssq+1, &tmp );
#if defined(PRECISION_z) || defined(PRECISION_c)
ptr++;
tmp = fabs(*ptr);
sumsq_update( 1., sclssq, sclssq+1, &tmp );
#endif
}
}
return CHAMELEON_SUCCESS;
}
/**
*
......@@ -60,6 +135,12 @@
*
*******************************************************************************
*
* @param[in] storev
* Specifies whether the sums are made per column or row.
* = ChamColumnwise: Computes the sum of squares on each column
* = ChamRowwise: Computes the sum of squares on each row
* = ChamEltwise: Computes the sum of squares on all the matrix
*
* @param[in] M
* The number of rows in the tile A.
*
......@@ -72,13 +153,16 @@
* @param[in] LDA
* The leading dimension of the tile A. LDA >= max(1,M).
*
* @param[in,out] scale
* On entry, the value scale in the equation above.
* On exit, scale is overwritten with the value scl.
*
* @param[in,out] sumsq
* On entry, the value sumsq in the equation above.
* On exit, SUMSQ is overwritten with the value ssq.
* @param[in,out] sclssq
* Dimension: (2,K)
* if storev == ChamColumnwise, K = N
* if storev == ChamRowwise, K = M
* if storev == ChamEltwise, K = 1
* On entry, sclssq contains K couples (sclssq[2*i], sclssq[2*i+1])
* which corresponds to (scale, sumsq) in the equation below
* ( scl**2 )*ssq = sum( A( i, j )**2 ) + ( scale**2 )*sumsq,
* respectively for the columns, the rows and the full matrix
* On exit, each couple is overwritten with the final result (scl, ssq).
*
*******************************************************************************
*
......@@ -86,26 +170,18 @@
* @retval -k, the k-th argument had an illegal value
*
*/
int CORE_zgessq(int M, int N,
const CHAMELEON_Complex64_t *A, int LDA,
double *scale, double *sumsq)
int CORE_zgessq( cham_store_t storev, int M, int N,
const CHAMELEON_Complex64_t *A, int LDA,
double *sclssq )
{
int i, j;
double tmp;
double *ptr;
for(j=0; j<N; j++) {
ptr = (double*) ( A + j * LDA );
for(i=0; i<M; i++, ptr++) {
tmp = fabs(*ptr);
UPDATE( 1., tmp );
#if defined(PRECISION_z) || defined(PRECISION_c)
ptr++;
tmp = fabs(*ptr);
UPDATE( 1., tmp );
#endif
}
if (storev == ChamColumnwise) {
CORE_zgessq_col( M, N, A, LDA, sclssq );
}
else if (storev == ChamRowwise) {
CORE_zgessq_row( M, N, A, LDA, sclssq );
}
else {
CORE_zgessq_elt( M, N, A, LDA, sclssq );
}
return CHAMELEON_SUCCESS;
}
......@@ -19,20 +19,8 @@
* @precisions normal z -> c
*
*/
#include <math.h>
#include "coreblas/lapacke.h"
#include "coreblas.h"
#define UPDATE( __nb, __value ) \
if (__value != 0. ){ \
if ( *scale < __value ) { \
*sumsq = __nb + (*sumsq) * ( *scale / __value ) * ( *scale / __value ); \
*scale = __value; \
} else { \
*sumsq = *sumsq + __nb * ( __value / *scale ) * ( __value / *scale ); \
} \
}
/**
*
* @ingroup CORE_CHAMELEON_Complex64_t
......@@ -56,18 +44,24 @@
* SCALE and SUMSQ are overwritten by scl and ssq respectively.
*
* The routine makes only one pass through the tile triangular part of the
* hermitian tile A defined by uplo.
* symmetric tile A defined by uplo.
* See also LAPACK zlassq.f
*
*******************************************************************************
*
* @param[in] storev
* Specifies whether the sums are made per column or row.
* = ChamColumnwise: Computes the sum of squares on each column
* = ChamRowwise: Computes the sum of squares on each row
* = ChamEltwise: Computes the sum of squares on all the matrix
*
* @param[in] uplo
* Specifies whether the upper or lower triangular part of
* the hermitian matrix A is to be referenced as follows:
* the symmetric matrix A is to be referenced as follows:
* = ChamLower: Only the lower triangular part of the
* hermitian matrix A is to be referenced.
* symmetric matrix A is to be referenced.
* = ChamUpper: Only the upper triangular part of the
* hermitian matrix A is to be referenced.
* symmetric matrix A is to be referenced.
*
* @param[in] N
* The number of columns and rows in the tile A.
......@@ -78,13 +72,16 @@
* @param[in] LDA
* The leading dimension of the tile A. LDA >= max(1,N).
*
* @param[in,out] scale
* On entry, the value scale in the equation above.
* On exit, scale is overwritten with the value scl.
*
* @param[in,out] sumsq
* On entry, the value sumsq in the equation above.
* On exit, SUMSQ is overwritten with the value ssq.
* @param[in,out] sclssq
* Dimension: (2,K)
* if storev == ChamColumnwise, K = N
* if storev == ChamRowwise, K = N
* if storev == ChamEltwise, K = 1
* On entry, sclssq contains K couples (sclssq[2*i], sclssq[2*i+1])
* which corresponds to (scale, sumsq) in the equation below
* ( scl**2 )*ssq = sum( A( i, j )**2 ) + ( scale**2 )*sumsq,
* respectively for the columns, the rows and the full matrix
* On exit, each couple is overwritten with the final result (scl, ssq).
*
*******************************************************************************
*
......@@ -92,65 +89,9 @@
* @retval -k, the k-th argument had an illegal value
*
*/
int CORE_zhessq(cham_uplo_t uplo, int N,
const CHAMELEON_Complex64_t *A, int LDA,
double *scale, double *sumsq)
int CORE_zhessq( cham_store_t storev, cham_uplo_t uplo, int N,
const CHAMELEON_Complex64_t *A, int LDA,
double *sclssq )
{
int i, j;
double tmp;
double *ptr;
if ( uplo == ChamUpper ) {
for(j=0; j<N; j++) {
ptr = (double*) ( A + j * LDA );
for(i=0; i<j; i++, ptr++) {
tmp = fabs(*ptr);
UPDATE( 2., tmp );
#if defined(PRECISION_z) || defined(PRECISION_c)
ptr++;
tmp = fabs(*ptr);
UPDATE( 2., tmp );
#endif
}
/* Diagonal */
tmp = fabs(*ptr);
UPDATE( 1., tmp );
#if defined(PRECISION_z) || defined(PRECISION_c)
ptr++;
#endif
}
} else {
for(j=0; j<N; j++) {
ptr = (double*) ( A + j * LDA + j);
/* Diagonal */
tmp = fabs(*ptr);
UPDATE( 1., tmp );
ptr++;
#if defined(PRECISION_z) || defined(PRECISION_c)
ptr++;
#endif
for(i=j+1; i<N; i++, ptr++) {
tmp = fabs(*ptr);
UPDATE( 2., tmp );
#if defined(PRECISION_z) || defined(PRECISION_c)
ptr++;
tmp = fabs(*ptr);
UPDATE( 2., tmp );
#endif
}
}
}
return CHAMELEON_SUCCESS;
return CORE_zsyssq( storev, uplo, N, A, LDA, sclssq );
}
/**
*
* @file core_zplssq.c
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon core_zplssq CPU kernel
*
* @version 0.9.2
* @author Mathieu Faverge
* @author Florent Pruvost
* @date 2019-04-01
* @precisions normal z -> c d s
*
*/
#include <math.h>
#include "coreblas/lapacke.h"
#include "coreblas/sumsq_update.h"
#include "coreblas.h"
/**
* @brief Subcase storev == ChamColumnwise of CORE_zplssq()
*/
static inline int
CORE_zplssq_col( int M, int N,
double *sclssqin,
double *sclssqout )
{
int i, j;
double *tmpScaleIn, *tmpSumsqIn, *tmpScaleOut, *tmpSumsqOut;
tmpScaleIn = sclssqin;
tmpSumsqIn = sclssqin+1;
tmpScaleOut = sclssqout;
tmpSumsqOut = sclssqout+1;
for(j=0; j<N; j++) {
for(i=0; i<M; i++) {
sumsq_update_2( 1., tmpScaleIn, tmpSumsqIn, tmpScaleOut, tmpSumsqOut );
tmpScaleIn+=2;
tmpSumsqIn+=2;
}
tmpScaleOut+=2;
tmpSumsqOut+=2;
}
return CHAMELEON_SUCCESS;
}
/**
* @brief Subcase storev == ChamRowwise of CORE_zplssq()
*/
static inline int
CORE_zplssq_row( int M, int N,
double *sclssqin,
double *sclssqout )
{
int i, j;
double *tmpScaleIn, *tmpSumsqIn, *tmpScaleOut, *tmpSumsqOut;
tmpScaleIn = sclssqin;
tmpSumsqIn = sclssqin+1;
for(j=0; j<N; j++) {
tmpScaleOut = sclssqout;
tmpSumsqOut = sclssqout+1;
for(i=0; i<M; i++) {
sumsq_update_2( 1., tmpScaleIn, tmpSumsqIn, tmpScaleOut, tmpSumsqOut );
tmpScaleIn+=2;
tmpSumsqIn+=2;
tmpScaleOut+=2;
tmpSumsqOut+=2;
}
}
return CHAMELEON_SUCCESS;
}
/**
* @brief Subcase storev == ChamEltwise of CORE_zplssq()
*/
static inline int
CORE_zplssq_elt( int M, int N,
double *sclssqin,
double *sclssqout )
{
int i, j;
double *tmpScaleIn, *tmpSumsqIn, *tmpScaleOut, *tmpSumsqOut;
tmpScaleIn = sclssqin;
tmpSumsqIn = sclssqin+1;
tmpScaleOut = sclssqout;
tmpSumsqOut = sclssqout+1;
for(j=0; j<N; j++) {
for(i=0; i<M; i++) {
sumsq_update_2( 1., tmpScaleIn, tmpSumsqIn, tmpScaleOut, tmpSumsqOut );
tmpScaleIn+=2;
tmpSumsqIn+=2;
}
}
return CHAMELEON_SUCCESS;
}
/**
*
* @ingroup CORE_CHAMELEON_Complex64_t
*
* CORE_zplssq adds the values ssq ensuring scl >= 0
*
*******************************************************************************
*
* @param[in] storev
* Specifies whether the sums are made per column or row.
* = ChamColumnwise: Computes the sum of squares on each column
* = ChamRowwise: Computes the sum of squares on each row
* = ChamEltwise: Computes the sum of squares on all the matrix
*
* @param[in] M
* The number of rows in the tile sclssqin.
*
* @param[in] N
* The number of columns in the tile sclssqin.
*
* @param[in] sclssqin
* The 2*M-by-2*N matrix on which to compute the sums.
*
* @param[in,out] sclssqout
* Dimension: (2,K)
* if storev == ChamColumnwise, K = N
* if storev == ChamRowwise, K = M
* if storev == ChamEltwise, K = 1
* On entry, sclssqout contains M-by-N couples (sclssqout[2*i], sclssqout[2*i+1])
* which corresponds to (scale, sumsq) in the equation below
* ( scl**2 )*ssq = sum( A( i, j )**2 ) + ( scale**2 )*sumsq,
* respectively for the columns, the rows and the full matrix
* On exit, each couple is overwritten with the final result (scl, ssq).
*
*******************************************************************************
*
* @retval CHAMELEON_SUCCESS successful exit
* @retval -k, the k-th argument had an illegal value
*
*/
int CORE_zplssq( cham_store_t storev, int M, int N,
double *sclssqin,
double *sclssqout )
{
if (storev == ChamColumnwise) {
CORE_zplssq_col( M, N, sclssqin, sclssqout );
}
else if (storev == ChamRowwise) {
CORE_zplssq_row( M, N, sclssqin, sclssqout );
}
else {
CORE_zplssq_elt( M, N, sclssqin, sclssqout );
}
return CHAMELEON_SUCCESS;
}
/**
*
* @ingroup CORE_CHAMELEON_Complex64_t
*
* CORE_zplssq2 computes scl*sqrt(ssq) for each couple in a vector
*
*******************************************************************************
*
* @param[in] N
* The number of columns in the tile sclssq.
*
* @param[in,out] sclssq
* The 2*N matrix on which to compute scl*sqrt(ssq)
* On entry contains all couple (scl,ssq) in (sclssq[i],sclssq[i+1])
* On exist returns sclssq[i]=sclssq[i]*sqrt(sclssq[i+1])
* so that the result is stored in the first line.
*
*******************************************************************************
*
* @retval CHAMELEON_SUCCESS successful exit
* @retval -k, the k-th argument had an illegal value
*
*/
int CORE_zplssq2( int N,
double *sclssq )
{
int i;
for (i=0; i<N; i+=2) {
sclssq[i] *= sqrt ( sclssq[i+1] );
}
return CHAMELEON_SUCCESS;
}
......@@ -21,17 +21,155 @@
*/
#include <math.h>
#include "coreblas/lapacke.h"
#include "coreblas/sumsq_update.h"
#include "coreblas.h"
#define UPDATE( __nb, __value ) \
if (__value != 0. ){ \
if ( *scale < __value ) { \
*sumsq = __nb + (*sumsq) * ( *scale / __value ) * ( *scale / __value ); \
*scale = __value; \
} else { \
*sumsq = *sumsq + __nb * ( __value / *scale ) * ( __value / *scale ); \
} \
/**
* @brief Subcase uplo == ChamUpper, storev == ChamColumnwise || ChamRowwise of CORE_zsyssq()
*/
static inline int
CORE_zsyssq_up_col( int N,
const CHAMELEON_Complex64_t *A, int LDA,
double *sclssq )
{
int i, j;
double tmp;
double *ptr;
for(j=0; j<N; j++) {
ptr = (double*) ( A + j * LDA );
for(i=0; i<j; i++, ptr++) {
tmp = fabs(*ptr);
sumsq_update( 1., sclssq+2*i, sclssq+2*i+1, &tmp );
sumsq_update( 1., sclssq+2*j, sclssq+2*j+1, &tmp );
#if defined(PRECISION_z) || defined(PRECISION_c)
ptr++;
tmp = fabs(*ptr);
sumsq_update( 1., sclssq+2*i, sclssq+2*i+1, &tmp );
sumsq_update( 1., sclssq+2*j, sclssq+2*j+1, &tmp );
#endif
}
tmp = fabs(*ptr);
sumsq_update( 1., sclssq+2*j, sclssq+2*j+1, &tmp );
#if defined(PRECISION_z) || defined(PRECISION_c)
ptr++;
tmp = fabs(*ptr);
sumsq_update( 1., sclssq+2*j, sclssq+2*j+1, &tmp );
#endif
}
return CHAMELEON_SUCCESS;
}
/**
* @brief Subcase uplo == ChamLower, storev == ChamColumnwise || ChamRowwise of CORE_zsyssq()
*/
static inline int
CORE_zsyssq_lo_col( int N,
const CHAMELEON_Complex64_t *A, int LDA,
double *sclssq )
{
int i, j;
double tmp;
double *ptr;
for(j=0; j<N; j++) {
ptr = (double*) ( A + j * (LDA + 1) );
tmp = fabs(*ptr);
sumsq_update( 1., sclssq+2*j, sclssq+2*j+1, &tmp );
#if defined(PRECISION_z) || defined(PRECISION_c)
ptr++;
tmp = fabs(*ptr);
sumsq_update( 1., sclssq+2*j, sclssq+2*j+1, &tmp );
#endif
ptr++;
for(i=j+1; i<N; i++, ptr++) {
tmp = fabs(*ptr);
sumsq_update( 1., sclssq+2*i, sclssq+2*i+1, &tmp );
sumsq_update( 1., sclssq+2*j, sclssq+2*j+1, &tmp );
#if defined(PRECISION_z) || defined(PRECISION_c)
ptr++;
tmp = fabs(*ptr);
sumsq_update( 1., sclssq+2*i, sclssq+2*i+1, &tmp );
sumsq_update( 1., sclssq+2*j, sclssq+2*j+1, &tmp );
#endif
}
}
return CHAMELEON_SUCCESS;