Mentions légales du service

Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • solverstack/chameleon
  • lvilleve/chameleon-toto
  • jcletort/chameleon
  • thibault/chameleon
  • tcojean/chameleon
  • sylvand/chameleon
  • viroulea/chameleon
  • x-ltac/chameleon
  • agullo/chameleon
  • glucas/chameleon
  • pswartva/chameleon
  • aguermou1/chameleon
  • eyrauddu/chameleon
  • mverite/chameleon
  • alisito/chameleon
  • furmento/chameleon
  • fpruvost/chameleon
  • ahourcau/chameleon
  • bnicolas/chameleon
  • pesterie/chameleon
  • mmarcos/chameleon
21 results
Show changes
Commits on Source (4)
Showing
with 744 additions and 24 deletions
......@@ -14,6 +14,7 @@ _extra_blas = [
('', 'she2ge', 'dhe2ge', 'che2ge', 'zhe2ge' ),
('', 'slatro', 'dlatro', 'clatro', 'zlatro' ), #=> Replace by getmo/gecmo as in essl
('', 'sbuild', 'dbuild', 'cbuild', 'zbuild' ), #=> Replace by map function
('', 'sgram', 'dgram', 'cgram', 'zgram' ),
]
_extra_BLAS = [ [ x.upper() for x in row ] for row in _extra_blas ]
......
......@@ -242,6 +242,11 @@ set(ZSRC
##################
zbuild.c
pzbuild.c
##################
# Gram
##################
zgram.c
pzgram.c
)
precisions_rules_py(CHAMELEON_SRCS_GENERATED "${ZSRC}"
......
......@@ -35,7 +35,7 @@
* @param[in,out] A
* On exit, the operator has been applied on each tile of the matrix A.
*
* @param[in] operator
* @param[in] op_fct
* The operator function to apply on each tile of the matrix.
*
* @param[in,out] op_args
......@@ -53,7 +53,7 @@
*/
int CHAMELEON_map_Tile( cham_uplo_t uplo,
CHAM_desc_t *A,
cham_unary_operator_t operator,
cham_unary_operator_t op_fct,
void *op_args )
{
CHAM_context_t *chamctxt;
......@@ -68,7 +68,7 @@ int CHAMELEON_map_Tile( cham_uplo_t uplo,
}
chameleon_sequence_create( chamctxt, &sequence );
CHAMELEON_map_Tile_Async( uplo, A, operator, op_args, sequence, &request );
CHAMELEON_map_Tile_Async( uplo, A, op_fct, op_args, sequence, &request );
CHAMELEON_Desc_Flush( A, sequence );
......@@ -107,7 +107,7 @@ int CHAMELEON_map_Tile( cham_uplo_t uplo,
*/
int CHAMELEON_map_Tile_Async( cham_uplo_t uplo,
CHAM_desc_t *A,
cham_unary_operator_t operator,
cham_unary_operator_t op_fct,
void *op_args,
RUNTIME_sequence_t *sequence,
RUNTIME_request_t *request )
......@@ -146,7 +146,7 @@ int CHAMELEON_map_Tile_Async( cham_uplo_t uplo,
return CHAMELEON_SUCCESS;
}
chameleon_pmap( uplo, A, operator, op_args, sequence, request );
chameleon_pmap( uplo, A, op_fct, op_args, sequence, request );
return CHAMELEON_SUCCESS;
}
......@@ -21,7 +21,7 @@
* chameleon_pmap - Generate a random matrix by tiles.
*/
void chameleon_pmap( cham_uplo_t uplo, CHAM_desc_t *A,
cham_unary_operator_t operator, void *op_args,
cham_unary_operator_t op_fct, void *op_args,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
{
CHAM_context_t *chamctxt;
......@@ -40,12 +40,12 @@ void chameleon_pmap( cham_uplo_t uplo, CHAM_desc_t *A,
INSERT_TASK_map(
&options,
ChamUpperLower, A(m, n),
operator, op_args );
op_fct, op_args );
}
INSERT_TASK_map(
&options,
uplo, A(n, n),
operator, op_args );
op_fct, op_args );
}
break;
......@@ -54,12 +54,12 @@ void chameleon_pmap( cham_uplo_t uplo, CHAM_desc_t *A,
INSERT_TASK_map(
&options,
uplo, A(n, n),
operator, op_args );
op_fct, op_args );
for (m = n+1; m < A->mt; m++) {
INSERT_TASK_map(
&options,
ChamUpperLower, A(m, n),
operator, op_args );
op_fct, op_args );
}
}
break;
......@@ -71,7 +71,7 @@ void chameleon_pmap( cham_uplo_t uplo, CHAM_desc_t *A,
INSERT_TASK_map(
&options,
uplo, A(m, n),
operator, op_args );
op_fct, op_args );
}
}
}
......
/**
*
* @file pzgram.c
*
* @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgram parallel algorithm
*
* @version 0.9.2
* @author Mathieu Faverge
* @author Florent Pruvost
* @date 2019-04-10
* @precisions normal z -> s d c z
*
*/
#include "control/common.h"
#define A( m, n ) A, (m), (n)
#define W( desc, m, n ) (desc), (m), (n)
static inline void
chameleon_pzgram_internal( cham_uplo_t uplo,
CHAM_desc_t *A,
CHAM_desc_t *Wcol,
CHAM_desc_t *Welt,
RUNTIME_option_t *options )
{
int m, n;
int MT = A->mt;
int NT = A->nt;
int M = A->m;
int N = A->n;
int P = Welt->p;
int Q = Welt->q;
/**
* 1) compute (scl,ssq) over columns in each tile
*/
for(n = 0; n < NT; n++) {
int mmin = ( uplo == ChamLower ) ? n : 0;
int mmax = ( uplo == ChamUpper ) ? chameleon_min(n+1, MT) : MT;
int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb;
for(m = mmin; m < mmax; m++) {
int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb;
int ldam = BLKLDD( A, m );
if ( n == m ) {
INSERT_TASK_dsyssq(
options, ChamColumnwise, uplo, tempmm,
A(m, n), ldam, W( Wcol, m, n) );
}
else {
INSERT_TASK_dgessq(
options, ChamColumnwise, tempmm, tempnn,
A(m, n), ldam, W( Wcol, m, n) );
if ( uplo != ChamUpperLower ) {
INSERT_TASK_dgessq(
options, ChamRowwise, tempmm, tempnn,
A(m, n), ldam, W( Wcol, n, m) );
}
}
}
}
for(n = 0; n < NT; n++) {
int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb;
/**
* 2) reduce columns (scl,ssq) tiles per processus (between lines)
*/
for(m = P; m < MT; m++) {
INSERT_TASK_dplssq(
options, ChamColumnwise, 1, tempnn,
W( Wcol, m, n ),
W( Wcol, m%P, n ) );
}
/**
* 3) reduce columns (scl,ssq) tiles on the first line of tiles
*/
for(m = 1; m < P; m++) {
INSERT_TASK_dplssq(
options, ChamColumnwise, 1, tempnn,
W( Wcol, m, n ),
W( Wcol, 0, n ) );
}
/* 4) reduce (scl,ssq) inside each tile of the first line of tiles for the global sum square */
INSERT_TASK_dplssq(
options, ChamEltwise, 1, tempnn,
W( Wcol, 0, n ),
W( Welt, 0, n ) );
/* 5) deduce the sum square for each column from the pairs (scl,ssq) -> sqrt(sum) = scl*sqrt(ssq) */
INSERT_TASK_dplssq2( options, tempnn, W( Wcol, 0, n ) );
}
/* 6) reduce global sum squares on each processus (between columns) */
for(n = Q; n < NT; n++) {
INSERT_TASK_dplssq( options, ChamEltwise, 1, 1, W( Welt, 0, n), W( Welt, 0, n%Q) );
}
/* 7) reduce global sum squares on the first tile (index 0, 0) */
for(n = 1; n < Q; n++) {
INSERT_TASK_dplssq(
options, ChamEltwise, 1, 1, W( Welt, 0, n), W( Welt, 0, 0) );
}
/* 8) deduce the global sum square from the pair (scl,ssq) -> sqrt(sum) = scl*sqrt(ssq) */
INSERT_TASK_dplssq2( options, 1, W( Welt, 0, 0) );
/* Finally compute Gram matrix coefficients inplace */
for(n = 0; n < NT; n++) {
int mmin = ( uplo == ChamLower ) ? n : 0;
int mmax = ( uplo == ChamUpper ) ? chameleon_min(n+1, MT) : MT;
int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb;
for(m = mmin; m < mmax; m++) {
int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb;
int ldam = BLKLDD( A, m );
INSERT_TASK_zgram(
options,
( m == n ) ? uplo : ChamUpperLower,
A->m, A->n, tempmm, tempnn,
W( Wcol, 0, m ), 2,
W( Wcol, 0, n ), 2,
W( Welt, 0, 0 ),
A( m, n ), ldam );
}
}
}
/**
*
*/
void chameleon_pzgram( cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
{
CHAM_context_t *chamctxt;
RUNTIME_option_t options;
CHAM_desc_t Wcol;
CHAM_desc_t Welt;
int workmt, worknt;
int m, n, tempmm, tempnn, ldw;
chamctxt = chameleon_context_self();
if ( sequence->status != CHAMELEON_SUCCESS ) {
return;
}
RUNTIME_options_init(&options, chamctxt, sequence, request);
workmt = chameleon_max( A->mt, A->p );
worknt = chameleon_max( A->nt, A->q );
RUNTIME_options_ws_alloc( &options, 1, 0 );
chameleon_desc_init( &Wcol, CHAMELEON_MAT_ALLOC_TILE, ChamRealDouble, 2, A->nb, 2*A->nb,
2*workmt, A->n, 0, 0, 2*workmt, A->n, A->p, A->q,
NULL, NULL, NULL );
chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_TILE, ChamRealDouble, 2, 1, 2,
2, worknt, 0, 0, 2, worknt, A->p, A->q,
NULL, NULL, NULL );
/* Initialize Wcol */
for(m = 0; m < Wcol.mt; m++) {
tempmm = m == Wcol.mt-1 ? Wcol.m-m*Wcol.mb : Wcol.mb;
ldw = Wcol.get_blkldd(&Wcol, m);
for(n = 0; n < Wcol.nt; n++) {
tempnn = n == Wcol.nt-1 ? Wcol.n-n*Wcol.nb : Wcol.nb;
INSERT_TASK_dlaset(
&options,
ChamUpperLower, tempmm, tempnn,
-1., -1.,
W( &Wcol, m, n ), ldw );
}
}
/* Initialize Welt */
for(m = 0; m < Welt.mt; m++) {
tempmm = m == Welt.mt-1 ? Welt.m-m*Welt.mb : Welt.mb;
ldw = Welt.get_blkldd(&Welt, m);
for(n = 0; n < Welt.nt; n++) {
tempnn = n == Welt.nt-1 ? Welt.n-n*Welt.nb : Welt.nb;
INSERT_TASK_dlaset(
&options,
ChamUpperLower, tempmm, tempnn,
-1., -1.,
W( &Welt, m, n ), ldw );
}
}
chameleon_pzgram_internal( uplo, A, &Wcol, &Welt, &options );
CHAMELEON_Desc_Flush( &Wcol, sequence );
CHAMELEON_Desc_Flush( &Welt, sequence );
CHAMELEON_Desc_Flush( A, sequence );
RUNTIME_sequence_wait( chamctxt, sequence );
chameleon_desc_destroy( &Wcol );
chameleon_desc_destroy( &Welt );
RUNTIME_options_ws_free(&options);
RUNTIME_options_finalize(&options, chamctxt);
}
/**
*
* @file zgram.c
*
* @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgram wrappers
*
* @version 0.9.2
* @author Mathieu Faverge
* @author Florent Pruvost
* @date 2019-04-10
* @precisions normal z -> s d c z
*
*/
#include "control/common.h"
/**
********************************************************************************
*
* @ingroup CHAMELEON_Complex64_t
*
* CHAMELEON_zgram replace a general matrix by the Gram matrix inplace
*
* \f[
* d_{i.}^2 = (1/n) \sum_j d_{ij}^2` and :math:`d_{..}^2 = (1/n^2) \sum_{i,j} d_{ij}^2 \\
* A_{i,j} = -(1/2) (d_{ij}^2 - d_{i.}^2 - d_{.j}^2 + d_{..}^2)
* \f]
*
*******************************************************************************
*
* @param[in] N
* The number of lines and columns of the matrix A. N >= 0. When N = 0,
* the returned value is set to zero.
*
* @param[in] A
* The N-by-N matrix A.
*
* @param[in] LDA
* The leading dimension of the array A. LDA >= max(1,N).
*
*******************************************************************************
*
* @retval CHAMELEON_SUCCESS successful exit
*
*******************************************************************************
*
* @sa CHAMELEON_zgram_Tile
* @sa CHAMELEON_zgram_Tile_Async
* @sa CHAMELEON_sgram
*
*/
int CHAMELEON_zgram( cham_uplo_t uplo, int N, CHAMELEON_Complex64_t *A, int LDA )
{
int NB;
int status;
CHAM_context_t *chamctxt;
RUNTIME_sequence_t *sequence = NULL;
RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
CHAM_desc_t descAl, descAt;
chamctxt = chameleon_context_self();
if (chamctxt == NULL) {
chameleon_fatal_error("CHAMELEON_zgram", "CHAMELEON not initialized");
return CHAMELEON_ERR_NOT_INITIALIZED;
}
/* Check input arguments */
if (N < 0) {
chameleon_error("CHAMELEON_zgram", "illegal value of N");
return -1;
}
if (LDA < chameleon_max(1, N)) {
chameleon_error("CHAMELEON_zgram", "illegal value of LDA");
return -3;
}
/* Quick return */
if (N == 0)
return CHAMELEON_SUCCESS;
/* Tune NB depending on M, N & NRHS; Set NBNB */
status = chameleon_tune(CHAMELEON_FUNC_DGEMM, N, N, 0);
if (status != CHAMELEON_SUCCESS) {
chameleon_error("CHAMELEON_zgram", "chameleon_tune() failed");
return status;
}
/* Set NT */
NB = CHAMELEON_NB;
chameleon_sequence_create( chamctxt, &sequence );
/* Submit the matrix conversion */
chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInout, uplo,
A, NB, NB, LDA, N, N, N, sequence, &request );
/* Call the tile interface */
CHAMELEON_zgram_Tile_Async( uplo, &descAt, sequence, &request );
/* Submit the matrix conversion back */
chameleon_ztile2lap( chamctxt, &descAl, &descAt,
ChamDescInout, uplo, sequence, &request );
chameleon_sequence_wait( chamctxt, sequence );
/* Cleanup the temporary data */
chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt );
status = sequence->status;
chameleon_sequence_destroy( chamctxt, sequence );
return status;
}
/**
********************************************************************************
*
* @ingroup CHAMELEON_Complex64_t_Tile
*
* CHAMELEON_zgram_Tile - Tile equivalent of CHAMELEON_zgram().
* Operates on matrices stored by tiles.
* All matrices are passed through descriptors.
* All dimensions are taken from the descriptors.
*
*******************************************************************************
*
* @param[in] A
* The N-by-N matrix A.
*
*******************************************************************************
*
* @retval CHAMELEON_SUCCESS successful exit
*
*******************************************************************************
*
* @sa CHAMELEON_zgram
* @sa CHAMELEON_zgram_Tile_Async
* @sa CHAMELEON_sgram_Tile
*
*/
int CHAMELEON_zgram_Tile( cham_uplo_t uplo, CHAM_desc_t *A )
{
CHAM_context_t *chamctxt;
RUNTIME_sequence_t *sequence = NULL;
RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
int status;
chamctxt = chameleon_context_self();
if (chamctxt == NULL) {
chameleon_fatal_error("CHAMELEON_zgram_Tile", "CHAMELEON not initialized");
return CHAMELEON_ERR_NOT_INITIALIZED;
}
chameleon_sequence_create( chamctxt, &sequence );
CHAMELEON_zgram_Tile_Async( uplo, A, sequence, &request );
CHAMELEON_Desc_Flush( A, sequence );
chameleon_sequence_wait( chamctxt, sequence );
status = sequence->status;
chameleon_sequence_destroy( chamctxt, sequence );
return status;
}
/**
********************************************************************************
*
* @ingroup CHAMELEON_Complex64_t_Tile_Async
*
* CHAMELEON_zgram_Tile_Async - Non-blocking equivalent of CHAMELEON_zgram_Tile().
* May return before the computation is finished.
* Allows for pipelining of operations at runtime.
*
*******************************************************************************
*
* @param[in] sequence
* Identifies the sequence of function calls that this call belongs to
* (for completion checks and exception handling purposes).
*
* @param[out] request
* Identifies this function call (for exception handling purposes).
*
*******************************************************************************
*
* @sa CHAMELEON_zgram
* @sa CHAMELEON_zgram_Tile
* @sa CHAMELEON_sgram_Tile_Async
*
*/
int CHAMELEON_zgram_Tile_Async( cham_uplo_t uplo, CHAM_desc_t *A,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
{
CHAM_context_t *chamctxt;
chamctxt = chameleon_context_self();
if (chamctxt == NULL) {
chameleon_fatal_error("CHAMELEON_zgram_Tile", "CHAMELEON not initialized");
return CHAMELEON_ERR_NOT_INITIALIZED;
}
if (sequence == NULL) {
chameleon_fatal_error("CHAMELEON_zgram_Tile", "NULL sequence");
return CHAMELEON_ERR_UNALLOCATED;
}
if (request == NULL) {
chameleon_fatal_error("CHAMELEON_zgram_Tile", "NULL request");
return CHAMELEON_ERR_UNALLOCATED;
}
/* Check sequence status */
if (sequence->status == CHAMELEON_SUCCESS) {
request->status = CHAMELEON_SUCCESS;
}
else {
return chameleon_request_fail(sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED);
}
/* Check descriptors for correctness */
if (chameleon_desc_check(A) != CHAMELEON_SUCCESS) {
chameleon_error("CHAMELEON_zgram_Tile", "invalid descriptor");
return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (A->nb != A->mb) {
chameleon_error("CHAMELEON_zgram_Tile", "only square tiles supported");
return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
}
/* Quick return */
if (chameleon_min(A->m, A->n) == 0) {
return CHAMELEON_SUCCESS;
}
chameleon_pzgram( uplo, A, sequence, request );
return CHAMELEON_SUCCESS;
}
......@@ -115,7 +115,10 @@ void chameleon_pzungqr_param( int genD, int K, const libhqr_tree_t *qrtree,
CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *D,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
/**
* Gram function prototypes
*/
void chameleon_pzgram( cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
/**
* LAPACK/Tile Descriptor accesses
......
......@@ -44,6 +44,7 @@ set(ZSRC
core_zgetrf.c
core_zgetrf_incpiv.c
core_zgetrf_nopiv.c
core_zgram.c
core_zhe2ge.c
core_zherfb.c
core_zhemm.c
......
......@@ -174,6 +174,24 @@ int CORE_zgessq( cham_store_t storev, int M, int N,
const CHAMELEON_Complex64_t *A, int LDA,
double *sclssq )
{
int i;
int K;
/* initialize pairs scale, sumsquare if not already done */
if ( storev == ChamColumnwise ) {
K = N;
} else if ( storev == ChamRowwise ) {
K = M;
} else {
K = 1;
}
for (i=0; i<2*K; i+=2) {
if ( ( sclssq[i] == -1. ) && ( sclssq[i+1] == -1. ) ) {
sclssq[i] = 1.;
sclssq[i+1] = 0.;
}
}
if (storev == ChamColumnwise) {
CORE_zgessq_col( M, N, A, LDA, sclssq );
}
......
/**
*
* @file core_zgram.c
*
* @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon core_zgram CPU kernel
*
* @version 0.9.2
* @author Mathieu Faverge
* @author Florent Pruvost
* @date 2019-04-10
* @precisions normal z -> s d c z
*
*/
#include "coreblas.h"
/**
*******************************************************************************
*
* @ingroup CORE_CHAMELEON_Complex64_t
*
* CORE_zgram computes the Gram matrix factors of A inplace
*
* \f[
* d_{i.}^2 = (1/n) \sum_j d_{ij}^2` and :math:`d_{..}^2 = (1/n^2) \sum_{i,j} d_{ij}^2 \\
* A_{i,j} = -(1/2) (d_{ij}^2 - d_{i.}^2 - d_{.j}^2 + d_{..}^2)
* \f]
*
*******************************************************************************
*
* @param[in] uplo
* Specifies the part of the matrix A to be computed.
* = ChamUpperLower: All the matrix A
* = ChamUpper: Upper triangular part
* = ChamLower: Lower triangular part
*
* @param[in] M
* The number of rows of the overall matrix. M >= 0.
*
* @param[in] N
* The number of columns of the overall matrix. N >= 0.
*
* @param[in] Mt
* The number of rows of the tile A. Mt >= 0.
*
* @param[in] Nt
* The number of columns of the tile A. Nt >= 0.
*
* @param[in] Di
* The 1-by-Mt tile containing the sum of squares by rows.
*
* @param[in] LDDI
* The leading dimension of the array Di.
*
* @param[in] Dj
* The 1-by-Nt tile containing the sum of squares by columns.
*
* @param[in] LDDJ
* The leading dimension of the array Dj.
*
* @param[in] D
* The sum of squares of all the matrix.
*
* @param[in,out] A
* On entry, the M-by-N tile A.
* On exit, updated by the application of L.
*
* @param[in] LDA
* The leading dimension of the array A. LDA >= max(1,M).
*
*******************************************************************************
*
* @retval CHAMELEON_SUCCESS successful exit
* @retval <0 if INFO = -k, the k-th argument had an illegal value
*
*/
int CORE_zgram( cham_uplo_t uplo,
int M, int N,
int Mt, int Nt,
const double *Di, int LDDI,
const double *Dj, int LDDJ,
const double *D,
double *A, int LDA )
{
int i, j;
double coef = -0.5;
double di, dj, d;
/* Check input arguments */
if (uplo != ChamUpper && uplo != ChamLower && uplo != ChamUpperLower ) {
coreblas_error(1, "Illegal value of uplo");
return -1;
}
if (M < 0) {
coreblas_error(2, "Illegal value of M");
return -2;
}
if (N < 0) {
coreblas_error(3, "Illegal value of N");
return -3;
}
if (Mt < 0) {
coreblas_error(4, "Illegal value of Mt");
return -4;
}
if (Nt < 0) {
coreblas_error(5, "Illegal value of Nt");
return -5;
}
if ((LDA < chameleon_max(1,Mt)) && (Mt > 0)) {
coreblas_error(12, "Illegal value of LDA");
return -12;
}
/* Quick return */
if ((Mt == 0) || (Nt == 0))
return CHAMELEON_SUCCESS;
/* overall mean of squares */
d = D[0]*D[0]/(M*N);
for(j = 0; j < Nt; j++) {
/* mean of squares of the column */
dj = Dj[j*LDDJ]*Dj[j*LDDJ]/M;
int mmin = ( uplo == ChamLower ) ? j : 0;
int mmax = ( uplo == ChamUpper) ? chameleon_min(j+1, Mt) : Mt;
for(i = mmin; i < mmax; i++) {
/* mean of squares of the row */
di = Di[i*LDDI]*Di[i*LDDI]/N;
/* compute Gram factor */
A[j*LDA+i] = coef*(A[j*LDA+i]*A[j*LDA+i] - di - dj + d);
}
}
return CHAMELEON_SUCCESS;
}
......@@ -41,7 +41,7 @@ CORE_zplssq_col( int M, int N,
for(j=0; j<N; j++) {
for(i=0; i<M; i++) {
sumsq_update_2( 1., tmpScaleIn, tmpSumsqIn, tmpScaleOut, tmpSumsqOut );
sumsq_update_2( tmpScaleIn, tmpSumsqIn, tmpScaleOut, tmpSumsqOut );
tmpScaleIn+=2;
tmpSumsqIn+=2;
}
......@@ -68,7 +68,7 @@ CORE_zplssq_row( int M, int N,
tmpScaleOut = sclssqout;
tmpSumsqOut = sclssqout+1;
for(i=0; i<M; i++) {
sumsq_update_2( 1., tmpScaleIn, tmpSumsqIn, tmpScaleOut, tmpSumsqOut );
sumsq_update_2( tmpScaleIn, tmpSumsqIn, tmpScaleOut, tmpSumsqOut );
tmpScaleIn+=2;
tmpSumsqIn+=2;
tmpScaleOut+=2;
......@@ -95,7 +95,7 @@ CORE_zplssq_elt( int M, int N,
for(j=0; j<N; j++) {
for(i=0; i<M; i++) {
sumsq_update_2( 1., tmpScaleIn, tmpSumsqIn, tmpScaleOut, tmpSumsqOut );
sumsq_update_2( tmpScaleIn, tmpSumsqIn, tmpScaleOut, tmpSumsqOut );
tmpScaleIn+=2;
tmpSumsqIn+=2;
}
......@@ -147,6 +147,24 @@ int CORE_zplssq( cham_store_t storev, int M, int N,
double *sclssqin,
double *sclssqout )
{
int i;
int K;
/* initialize pairs scale, sumsquare if not already done */
if ( storev == ChamColumnwise ) {
K = N;
} else if ( storev == ChamRowwise ) {
K = M;
} else {
K = 1;
}
for (i=0; i<2*K; i+=2) {
if ( ( sclssqout[i] == -1. ) && ( sclssqout[i+1] == -1. ) ) {
sclssqout[i] = 1.;
sclssqout[i+1] = 0.;
}
}
if (storev == ChamColumnwise) {
CORE_zplssq_col( M, N, sclssqin, sclssqout );
}
......@@ -173,7 +191,7 @@ int CORE_zplssq( cham_store_t storev, int M, int N,
* @param[in,out] sclssq
* The 2*N matrix on which to compute scl*sqrt(ssq)
* On entry contains all couple (scl,ssq) in (sclssq[i],sclssq[i+1])
* On exist returns sclssq[i]=sclssq[i]*sqrt(sclssq[i+1])
* On exist returns scl*sqrt(ssq) stored in sclssq[2*i], i = 0, ..., N-1
* so that the result is stored in the first line.
*
*******************************************************************************
......@@ -186,8 +204,8 @@ int CORE_zplssq2( int N,
double *sclssq )
{
int i;
for (i=0; i<N; i+=2) {
sclssq[i] *= sqrt ( sclssq[i+1] );
for (i=0; i<2*N; i+=2) {
sclssq[i] = sclssq[i]*sqrt(sclssq[i+1]);
}
return CHAMELEON_SUCCESS;
}
......@@ -244,6 +244,20 @@ int CORE_zsyssq( cham_store_t storev, cham_uplo_t uplo, int N,
const CHAMELEON_Complex64_t *A, int LDA,
double *sclssq )
{
int i;
int K = N;
/* Initialize pairs scale, sumsquare if not already done */
if ( storev == ChamEltwise ) {
K = 1;
}
for (i=0; i<2*K; i+=2) {
if ( ( sclssq[i] == -1. ) && ( sclssq[i+1] == -1. ) ) {
sclssq[i] = 1.;
sclssq[i+1] = 0.;
}
}
if ( uplo == ChamUpper ) {
if ( storev == ChamEltwise ) {
CORE_zsyssq_up_elt( N, A, LDA, sclssq );
......
......@@ -360,4 +360,13 @@ int CORE_zunmqr(cham_side_t side, cham_trans_t trans,
CHAMELEON_Complex64_t *C, int LDC,
CHAMELEON_Complex64_t *WORK, int LDWORK);
/**
* Gram prototypes
*/
int CORE_zgram( cham_uplo_t uplo, int M, int N, int Mt, int Nt,
const double *Di, int LDDI,
const double *Dj, int LDDJ,
const double *D,
double *A, int LDA );
#endif /* _coreblas_z_h_ */
......@@ -79,7 +79,7 @@ sumsq_update( int nb, float *scale, float *sumsq, const float *value )
*******************************************************************************
*
* @brief Update the couple (scale, sumsq) by adding another couple when
* computing the Froebnius norm.
* computing the Froebenius norm.
*
* The frobenius norm is equal to scale * sqrt( sumsq ), this method allows to
* avoid overflow in the sum square computation.
......@@ -100,7 +100,7 @@ sumsq_update( int nb, float *scale, float *sumsq, const float *value )
*******************************************************************************/
static inline void
#if defined(PRECISION_d) || defined(PRECISION_z)
sumsq_update_2( int nb, const double *scalein, const double *sumsqin, double *scaleout, double *sumsqout )
sumsq_update_2( const double *scalein, const double *sumsqin, double *scaleout, double *sumsqout )
{
if (*scaleout >= 0.) {
if ( (*scaleout) < (*scalein) ) {
......@@ -114,7 +114,7 @@ sumsq_update_2( int nb, const double *scalein, const double *sumsqin, double *sc
}
}
#elif defined(PRECISION_s) || defined(PRECISION_c)
sumsq_update_2( int nb, const float *scalein, const float *sumsqin, float *scaleout, float *sumsqout )
sumsq_update_2( const float *scalein, const float *sumsqin, float *scaleout, float *sumsqout )
{
if (*scaleout >= 0.) {
if ( (*scaleout) < (*scalein) ) {
......
......@@ -331,6 +331,13 @@ int CHAMELEON_zbuild(cham_uplo_t uplo, int M, int N, CHAMELEON_Complex64_t *A, i
int CHAMELEON_zbuild_Tile(cham_uplo_t uplo, CHAM_desc_t *A, void *user_data, void* user_build_callback );
int CHAMELEON_zbuild_Tile_Async(cham_uplo_t uplo, CHAM_desc_t *A, void *user_data, void* user_build_callback, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
/**
* Gram function prototypes
*/
int CHAMELEON_zgram( cham_uplo_t uplo, int N, CHAMELEON_Complex64_t *A, int LDA );
int CHAMELEON_zgram_Tile( cham_uplo_t uplo, CHAM_desc_t *A );
int CHAMELEON_zgram_Tile_Async( cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
END_C_DECLS
#endif /* _chameleon_z_h_ */
......@@ -418,4 +418,15 @@ INSERT_TASK_zttmqr( const RUNTIME_option_t *options,
A1, A1m, A1n, lda1, A2, A2m, A2n, lda2 );
}
/**
* Gram prototype
*/
void INSERT_TASK_zgram( const RUNTIME_option_t *options,
cham_uplo_t uplo,
int m, int n, int mt, int nt,
const CHAM_desc_t *Di, int Dim, int Din, int lddi,
const CHAM_desc_t *Dj, int Djm, int Djn, int lddj,
const CHAM_desc_t *D, int Dm, int Dn,
CHAM_desc_t *A, int Am, int An, int lda);
#endif /* _chameleon_tasks_z_h_ */
......@@ -94,6 +94,10 @@ set(CODELETS_ZSRC
# BUILD
##################
codelets/codelet_zbuild.c
##################
# GRAM
##################
codelets/codelet_zgram.c
)
set(CODELETS_SRC
......
......@@ -18,13 +18,13 @@
void INSERT_TASK_map( const RUNTIME_option_t *options,
cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An,
cham_unary_operator_t operator, void *op_args )
cham_unary_operator_t op_fct, void *op_args )
{
char *ptrA = RTBLKADDR( A, char, Am, An );
#pragma omp task depend(inout: ptrA[0])
{
operator( A, uplo, Am, An, ptrA, op_args );
op_fct( A, uplo, Am, An, ptrA, op_args );
}
}
/**
*
* @file openmp/codelet_zgram.c
*
* @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgram OpenMP codelet
*
* @version 0.9.2
* @author Mathieu Faverge
* @author Florent Pruvost
* @date 2019-04-10
* @precisions normal z -> s d c z
*
*/
#include "chameleon_openmp.h"
#include "chameleon/tasks_z.h"
void INSERT_TASK_zgram( const RUNTIME_option_t *options,
cham_uplo_t uplo,
int m, int n, int mt, int nt,
const CHAM_desc_t *Di, int Dim, int Din, int lddi,
const CHAM_desc_t *Dj, int Djm, int Djn, int lddj,
const CHAM_desc_t *D, int Dm, int Dn,
CHAM_desc_t *A, int Am, int An, int lda)
{
double *ptrDi = RTBLKADDR(Di, double, Dim, Din);
double *ptrDj = RTBLKADDR(Dj, double, Djm, Djn);
double *ptrD = RTBLKADDR(D, double, Dm, Dn);
double *ptrA = RTBLKADDR(A, double, Am, An);
#pragma omp task firstprivate(uplo, m, n, mt, nt, ptrDi, lddi, ptrDj, lddj, ptrD, ptrA, lda) depend(in:ptrDi[0], ptrDj[0], ptrD[0]) depend(inout:ptrA[0])
CORE_zgram( uplo,
m, n, mt, nt,
ptrDi, lddi,
ptrDj, lddj,
ptrD,
ptrA, lda);
}
......@@ -39,7 +39,7 @@ void INSERT_TASK_zplssq( const RUNTIME_option_t *options,
void INSERT_TASK_zplssq2( const RUNTIME_option_t *options, int N,
const CHAM_desc_t *RESULT, int RESULTm, int RESULTn )
{
CHAMELEON_Complex64_t *res = RTBLKADDR(RESULT, double, RESULTm, RESULTn);
double *res = RTBLKADDR(RESULT, double, RESULTm, RESULTn);
#pragma omp task firstprivate(N) depend(inout: res[0])
CORE_zplssq2(N, res);
......