Commit 9691baac authored by PRUVOST Florent's avatar PRUVOST Florent Committed by Mathieu Faverge

update ge/he/syssq algos to make them more generic and get the possibility to...

update ge/he/syssq algos to make them more generic and get the possibility to accumulate squares over columns or rows
parent e0ec2468
......@@ -14,6 +14,7 @@ _extra_blas = [
('', 'she2ge', 'dhe2ge', 'che2ge', 'zhe2ge' ),
('', 'slatro', 'dlatro', 'clatro', 'zlatro' ), #=> Replace by getmo/gecmo as in essl
('', 'sbuild', 'dbuild', 'cbuild', 'zbuild' ), #=> Replace by map function
('', 'sgram', 'dgram', 'cgram', 'zgram' ),
]
_extra_BLAS = [ [ x.upper() for x in row ] for row in _extra_blas ]
......
......@@ -242,6 +242,11 @@ set(ZSRC
##################
zbuild.c
pzbuild.c
##################
# Gram
##################
zgram.c
pzgram.c
)
precisions_rules_py(CHAMELEON_SRCS_GENERATED "${ZSRC}"
......
/**
*
* @file pzgram.c
*
* @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgram parallel algorithm
*
* @version 0.9.2
* @author Mathieu Faverge
* @author Florent Pruvost
* @date 2019-04-10
* @precisions normal z -> s d c z
*
*/
#include "control/common.h"
#define A( m, n ) A, (m), (n)
#define W( desc, m, n ) (desc), (m), (n)
static inline void
chameleon_pzgram_internal( cham_uplo_t uplo,
CHAM_desc_t *A,
CHAM_desc_t *Wcol,
CHAM_desc_t *Welt,
RUNTIME_option_t *options )
{
int m, n;
int MT = A->mt;
int NT = A->nt;
int M = A->m;
int N = A->n;
int P = Welt->p;
int Q = Welt->q;
/**
* 1) compute (scl,ssq) over columns in each tile
*/
for(n = 0; n < NT; n++) {
int mmin = ( uplo == ChamLower ) ? n : 0;
int mmax = ( uplo == ChamUpper ) ? chameleon_min(n+1, MT) : MT;
int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb;
for(m = mmin; m < mmax; m++) {
int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb;
int ldam = BLKLDD( A, m );
if ( n == m ) {
INSERT_TASK_dsyssq(
options, ChamColumnwise, uplo, tempmm,
A(m, n), ldam, W( Wcol, m, n) );
}
else {
INSERT_TASK_dgessq(
options, ChamColumnwise, tempmm, tempnn,
A(m, n), ldam, W( Wcol, m, n) );
if ( uplo != ChamUpperLower ) {
INSERT_TASK_dgessq(
options, ChamRowwise, tempmm, tempnn,
A(m, n), ldam, W( Wcol, n, m) );
}
}
}
}
for(n = 0; n < NT; n++) {
int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb;
/**
* 2) reduce columns (scl,ssq) tiles per processus (between lines)
*/
for(m = P; m < MT; m++) {
INSERT_TASK_dplssq(
options, ChamColumnwise, 1, tempnn,
W( Wcol, m, n ),
W( Wcol, m%P, n ) );
}
/**
* 3) reduce columns (scl,ssq) tiles on the first line of tiles
*/
for(m = 1; m < P; m++) {
INSERT_TASK_dplssq(
options, ChamColumnwise, 1, tempnn,
W( Wcol, m, n ),
W( Wcol, 0, n ) );
}
/* 4) reduce (scl,ssq) inside each tile of the first line of tiles for the global sum square */
INSERT_TASK_dplssq(
options, ChamEltwise, 1, tempnn,
W( Wcol, 0, n ),
W( Welt, 0, n ) );
/* 5) deduce the sum square for each column from the pairs (scl,ssq) -> sqrt(sum) = scl*sqrt(ssq) */
INSERT_TASK_dplssq2( options, tempnn, W( Wcol, 0, n ) );
}
/* 6) reduce global sum squares on each processus (between columns) */
for(n = Q; n < NT; n++) {
INSERT_TASK_dplssq( options, ChamEltwise, 1, 1, W( Welt, 0, n), W( Welt, 0, n%Q) );
}
/* 7) reduce global sum squares on the first tile (index 0, 0) */
for(n = 1; n < Q; n++) {
INSERT_TASK_dplssq(
options, ChamEltwise, 1, 1, W( Welt, 0, n), W( Welt, 0, 0) );
}
/* 8) deduce the global sum square from the pair (scl,ssq) -> sqrt(sum) = scl*sqrt(ssq) */
INSERT_TASK_dplssq2( options, 1, W( Welt, 0, 0) );
/* Finally compute Gram matrix coefficients inplace */
for(n = 0; n < NT; n++) {
int mmin = ( uplo == ChamLower ) ? n : 0;
int mmax = ( uplo == ChamUpper ) ? chameleon_min(n+1, MT) : MT;
int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb;
for(m = mmin; m < mmax; m++) {
int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb;
int ldam = BLKLDD( A, m );
INSERT_TASK_zgram(
options,
( m == n ) ? uplo : ChamUpperLower,
A->m, A->n, tempmm, tempnn,
W( Wcol, 0, m ), 2,
W( Wcol, 0, n ), 2,
W( Welt, 0, 0 ),
A( m, n ), ldam );
}
}
}
/**
*
*/
void chameleon_pzgram( cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
{
CHAM_context_t *chamctxt;
RUNTIME_option_t options;
CHAM_desc_t Wcol;
CHAM_desc_t Welt;
int workmt, worknt;
int m, n, tempmm, tempnn, ldw;
chamctxt = chameleon_context_self();
if ( sequence->status != CHAMELEON_SUCCESS ) {
return;
}
RUNTIME_options_init(&options, chamctxt, sequence, request);
workmt = chameleon_max( A->mt, A->p );
worknt = chameleon_max( A->nt, A->q );
RUNTIME_options_ws_alloc( &options, 1, 0 );
chameleon_desc_init( &Wcol, CHAMELEON_MAT_ALLOC_TILE, ChamRealDouble, 2, A->nb, 2*A->nb,
2*workmt, A->n, 0, 0, 2*workmt, A->n, A->p, A->q,
NULL, NULL, NULL );
chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_TILE, ChamRealDouble, 2, 1, 2,
2, worknt, 0, 0, 2, worknt, A->p, A->q,
NULL, NULL, NULL );
/* Initialize Wcol */
for(m = 0; m < Wcol.mt; m++) {
tempmm = m == Wcol.mt-1 ? Wcol.m-m*Wcol.mb : Wcol.mb;
ldw = Wcol.get_blkldd(&Wcol, m);
for(n = 0; n < Wcol.nt; n++) {
tempnn = n == Wcol.nt-1 ? Wcol.n-n*Wcol.nb : Wcol.nb;
INSERT_TASK_dlaset(
&options,
ChamUpperLower, tempmm, tempnn,
-1., -1.,
W( &Wcol, m, n ), ldw );
}
}
/* Initialize Welt */
for(m = 0; m < Welt.mt; m++) {
tempmm = m == Welt.mt-1 ? Welt.m-m*Welt.mb : Welt.mb;
ldw = Welt.get_blkldd(&Welt, m);
for(n = 0; n < Welt.nt; n++) {
tempnn = n == Welt.nt-1 ? Welt.n-n*Welt.nb : Welt.nb;
INSERT_TASK_dlaset(
&options,
ChamUpperLower, tempmm, tempnn,
-1., -1.,
W( &Welt, m, n ), ldw );
}
}
chameleon_pzgram_internal( uplo, A, &Wcol, &Welt, &options );
CHAMELEON_Desc_Flush( &Wcol, sequence );
CHAMELEON_Desc_Flush( &Welt, sequence );
CHAMELEON_Desc_Flush( A, sequence );
RUNTIME_sequence_wait( chamctxt, sequence );
chameleon_desc_destroy( &Wcol );
chameleon_desc_destroy( &Welt );
RUNTIME_options_ws_free(&options);
RUNTIME_options_finalize(&options, chamctxt);
}
/**
*
* @file zgram.c
*
* @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgram wrappers
*
* @version 0.9.2
* @author Mathieu Faverge
* @author Florent Pruvost
* @date 2019-04-10
* @precisions normal z -> s d c z
*
*/
#include "control/common.h"
/**
********************************************************************************
*
* @ingroup CHAMELEON_Complex64_t
*
* CHAMELEON_zgram replace a general matrix by the Gram matrix inplace
*
* \f[
* d_{i.}^2 = (1/n) \sum_j d_{ij}^2` and :math:`d_{..}^2 = (1/n^2) \sum_{i,j} d_{ij}^2 \\
* A_{i,j} = -(1/2) (d_{ij}^2 - d_{i.}^2 - d_{.j}^2 + d_{..}^2)
* \f]
*
*******************************************************************************
*
* @param[in] N
* The number of lines and columns of the matrix A. N >= 0. When N = 0,
* the returned value is set to zero.
*
* @param[in] A
* The N-by-N matrix A.
*
* @param[in] LDA
* The leading dimension of the array A. LDA >= max(1,N).
*
*******************************************************************************
*
* @retval CHAMELEON_SUCCESS successful exit
*
*******************************************************************************
*
* @sa CHAMELEON_zgram_Tile
* @sa CHAMELEON_zgram_Tile_Async
* @sa CHAMELEON_sgram
*
*/
int CHAMELEON_zgram( cham_uplo_t uplo, int N, CHAMELEON_Complex64_t *A, int LDA )
{
int NB;
int status;
CHAM_context_t *chamctxt;
RUNTIME_sequence_t *sequence = NULL;
RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
CHAM_desc_t descAl, descAt;
chamctxt = chameleon_context_self();
if (chamctxt == NULL) {
chameleon_fatal_error("CHAMELEON_zgram", "CHAMELEON not initialized");
return CHAMELEON_ERR_NOT_INITIALIZED;
}
/* Check input arguments */
if (N < 0) {
chameleon_error("CHAMELEON_zgram", "illegal value of N");
return -1;
}
if (LDA < chameleon_max(1, N)) {
chameleon_error("CHAMELEON_zgram", "illegal value of LDA");
return -3;
}
/* Quick return */
if (N == 0)
return CHAMELEON_SUCCESS;
/* Tune NB depending on M, N & NRHS; Set NBNB */
status = chameleon_tune(CHAMELEON_FUNC_DGEMM, N, N, 0);
if (status != CHAMELEON_SUCCESS) {
chameleon_error("CHAMELEON_zgram", "chameleon_tune() failed");
return status;
}
/* Set NT */
NB = CHAMELEON_NB;
chameleon_sequence_create( chamctxt, &sequence );
/* Submit the matrix conversion */
chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInout, uplo,
A, NB, NB, LDA, N, N, N, sequence, &request );
/* Call the tile interface */
CHAMELEON_zgram_Tile_Async( uplo, &descAt, sequence, &request );
/* Submit the matrix conversion back */
chameleon_ztile2lap( chamctxt, &descAl, &descAt,
ChamDescInout, uplo, sequence, &request );
chameleon_sequence_wait( chamctxt, sequence );
/* Cleanup the temporary data */
chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt );
status = sequence->status;
chameleon_sequence_destroy( chamctxt, sequence );
return status;
}
/**
********************************************************************************
*
* @ingroup CHAMELEON_Complex64_t_Tile
*
* CHAMELEON_zgram_Tile - Tile equivalent of CHAMELEON_zgram().
* Operates on matrices stored by tiles.
* All matrices are passed through descriptors.
* All dimensions are taken from the descriptors.
*
*******************************************************************************
*
* @param[in] A
* The N-by-N matrix A.
*
*******************************************************************************
*
* @retval CHAMELEON_SUCCESS successful exit
*
*******************************************************************************
*
* @sa CHAMELEON_zgram
* @sa CHAMELEON_zgram_Tile_Async
* @sa CHAMELEON_sgram_Tile
*
*/
int CHAMELEON_zgram_Tile( cham_uplo_t uplo, CHAM_desc_t *A )
{
CHAM_context_t *chamctxt;
RUNTIME_sequence_t *sequence = NULL;
RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
int status;
chamctxt = chameleon_context_self();
if (chamctxt == NULL) {
chameleon_fatal_error("CHAMELEON_zgram_Tile", "CHAMELEON not initialized");
return CHAMELEON_ERR_NOT_INITIALIZED;
}
chameleon_sequence_create( chamctxt, &sequence );
CHAMELEON_zgram_Tile_Async( uplo, A, sequence, &request );
CHAMELEON_Desc_Flush( A, sequence );
chameleon_sequence_wait( chamctxt, sequence );
status = sequence->status;
chameleon_sequence_destroy( chamctxt, sequence );
return status;
}
/**
********************************************************************************
*
* @ingroup CHAMELEON_Complex64_t_Tile_Async
*
* CHAMELEON_zgram_Tile_Async - Non-blocking equivalent of CHAMELEON_zgram_Tile().
* May return before the computation is finished.
* Allows for pipelining of operations at runtime.
*
*******************************************************************************
*
* @param[in] sequence
* Identifies the sequence of function calls that this call belongs to
* (for completion checks and exception handling purposes).
*
* @param[out] request
* Identifies this function call (for exception handling purposes).
*
*******************************************************************************
*
* @sa CHAMELEON_zgram
* @sa CHAMELEON_zgram_Tile
* @sa CHAMELEON_sgram_Tile_Async
*
*/
int CHAMELEON_zgram_Tile_Async( cham_uplo_t uplo, CHAM_desc_t *A,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
{
CHAM_context_t *chamctxt;
chamctxt = chameleon_context_self();
if (chamctxt == NULL) {
chameleon_fatal_error("CHAMELEON_zgram_Tile", "CHAMELEON not initialized");
return CHAMELEON_ERR_NOT_INITIALIZED;
}
if (sequence == NULL) {
chameleon_fatal_error("CHAMELEON_zgram_Tile", "NULL sequence");
return CHAMELEON_ERR_UNALLOCATED;
}
if (request == NULL) {
chameleon_fatal_error("CHAMELEON_zgram_Tile", "NULL request");
return CHAMELEON_ERR_UNALLOCATED;
}
/* Check sequence status */
if (sequence->status == CHAMELEON_SUCCESS) {
request->status = CHAMELEON_SUCCESS;
}
else {
return chameleon_request_fail(sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED);
}
/* Check descriptors for correctness */
if (chameleon_desc_check(A) != CHAMELEON_SUCCESS) {
chameleon_error("CHAMELEON_zgram_Tile", "invalid descriptor");
return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (A->nb != A->mb) {
chameleon_error("CHAMELEON_zgram_Tile", "only square tiles supported");
return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
}
/* Quick return */
if (chameleon_min(A->m, A->n) == 0) {
return CHAMELEON_SUCCESS;
}
chameleon_pzgram( uplo, A, sequence, request );
return CHAMELEON_SUCCESS;
}
......@@ -115,7 +115,10 @@ void chameleon_pzungqr_param( int genD, int K, const libhqr_tree_t *qrtree,
CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *D,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
/**
* Gram function prototypes
*/
void chameleon_pzgram( cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
/**
* LAPACK/Tile Descriptor accesses
......
......@@ -44,6 +44,7 @@ set(ZSRC
core_zgetrf.c
core_zgetrf_incpiv.c
core_zgetrf_nopiv.c
core_zgram.c
core_zhe2ge.c
core_zherfb.c
core_zhemm.c
......
......@@ -174,6 +174,24 @@ int CORE_zgessq( cham_store_t storev, int M, int N,
const CHAMELEON_Complex64_t *A, int LDA,
double *sclssq )
{
int i;
int K;
/* initialize pairs scale, sumsquare if not already done */
if ( storev == ChamColumnwise ) {
K = N;
} else if ( storev == ChamRowwise ) {
K = M;
} else {
K = 1;
}
for (i=0; i<2*K; i+=2) {
if ( ( sclssq[i] == -1. ) && ( sclssq[i+1] == -1. ) ) {
sclssq[i] = 1.;
sclssq[i+1] = 0.;
}
}
if (storev == ChamColumnwise) {
CORE_zgessq_col( M, N, A, LDA, sclssq );
}
......
/**
*
* @file core_zgram.c
*
* @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon core_zgram CPU kernel
*
* @version 0.9.2
* @author Mathieu Faverge
* @author Florent Pruvost
* @date 2019-04-10
* @precisions normal z -> s d c z
*
*/
#include "coreblas.h"
/**
*******************************************************************************
*
* @ingroup CORE_CHAMELEON_Complex64_t
*
* CORE_zgram computes the Gram matrix factors of A inplace
*
* \f[
* d_{i.}^2 = (1/n) \sum_j d_{ij}^2` and :math:`d_{..}^2 = (1/n^2) \sum_{i,j} d_{ij}^2 \\
* A_{i,j} = -(1/2) (d_{ij}^2 - d_{i.}^2 - d_{.j}^2 + d_{..}^2)
* \f]
*
*******************************************************************************
*
* @param[in] uplo
* Specifies the part of the matrix A to be computed.
* = ChamUpperLower: All the matrix A
* = ChamUpper: Upper triangular part
* = ChamLower: Lower triangular part
*
* @param[in] M
* The number of rows of the overall matrix. M >= 0.
*
* @param[in] N
* The number of columns of the overall matrix. N >= 0.
*
* @param[in] Mt
* The number of rows of the tile A. Mt >= 0.
*
* @param[in] Nt
* The number of columns of the tile A. Nt >= 0.
*
* @param[in] Di
* The 1-by-Mt tile containing the sum of squares by rows.
*
* @param[in] LDDI
* The leading dimension of the array Di.
*
* @param[in] Dj
* The 1-by-Nt tile containing the sum of squares by columns.
*
* @param[in] LDDJ
* The leading dimension of the array Dj.
*
* @param[in] D
* The sum of squares of all the matrix.
*
* @param[in,out] A
* On entry, the M-by-N tile A.
* On exit, updated by the application of L.
*
* @param[in] LDA
* The leading dimension of the array A. LDA >= max(1,M).
*
*******************************************************************************
*
* @retval CHAMELEON_SUCCESS successful exit
* @retval <0 if INFO = -k, the k-th argument had an illegal value
*
*/
int CORE_zgram( cham_uplo_t uplo,
int M, int N,
int Mt, int Nt,
const double *Di, int LDDI,
const double *Dj, int LDDJ,
const double *D,
double *A, int LDA )
{
int i, j;
double coef = -0.5;
double di, dj, d;
/* Check input arguments */
if (uplo != ChamUpper && uplo != ChamLower && uplo != ChamUpperLower ) {
coreblas_error(1, "Illegal value of uplo");
return -1;
}
if (M < 0) {
coreblas_error(2, "Illegal value of M");
return -2;
}
if (N < 0) {
coreblas_error(3, "Illegal value of N");
return -3;
}
if (Mt < 0) {
coreblas_error(4, "Illegal value of Mt");
return -4;
}
if (Nt < 0) {
coreblas_error(5, "Illegal value of Nt");
return -5;
}
if ((LDA < chameleon_max(1,Mt)) && (Mt > 0)) {
coreblas_error(12, "Illegal value of LDA");
return -12;
}
/* Quick return */
if ((Mt == 0) || (Nt == 0))
return CHAMELEON_SUCCESS;
/* overall mean of squares */
d = D[0]*D[0]/(M*N);
for(j = 0; j < Nt; j++) {
/* mean of squares of the column */
dj = Dj[j*LDDJ]*Dj[j*LDDJ]/M;
int mmin = ( uplo == ChamLower ) ? j : 0;
int mmax = ( uplo == ChamUpper) ? chameleon_min(j+1, Mt) : Mt;
for(i = mmin; i < mmax; i++) {
/* mean of squares of the row */
di = Di[i*LDDI]*Di[i*LDDI]/N;
/* compute Gram factor */
A[j*LDA+i] = coef*(A[j*LDA+i]*A[j*LDA+i] - di - dj + d);
}
}
return CHAMELEON_SUCCESS;
}
......@@ -41,7 +41,7 @@ CORE_zplssq_col( int M, int N,
for(j=0; j<N; j++) {
for(i=0; i<M; i++) {
sumsq_update_2( 1., tmpScaleIn, tmpSumsqIn, tmpScaleOut, tmpSumsqOut );
sumsq_update_2( tmpScaleIn, tmpSumsqIn, tmpScaleOut, tmpSumsqOut );
tmpScaleIn+=2;
tmpSumsqIn+=2;
}
......@@ -68,7 +68,7 @@ CORE_zplssq_row( int M, int N,
tmpScaleOut = sclssqout;
tmpSumsqOut = sclssqout+1;
for(i=0; i<M; i++) {
sumsq_update_2( 1., tmpScaleIn, tmpSumsqIn, tmpScaleOut, tmpSumsqOut );
sumsq_update_2( tmpScaleIn, tmpSumsqIn, tmpScaleOut, tmpSumsqOut );
tmpScaleIn+=2;
tmpSumsqIn+=2;
tmpScaleOut+=2;
......@@ -95,7 +95,7 @@ CORE_zplssq_elt( int M, int N,
for(j=0; j<N; j++) {
for(i=0; i<M; i++) {
sumsq_update_2( 1., tmpScaleIn, tmpSumsqIn, tmpScaleOut, tmpSumsqOut );
sumsq_update_2( tmpScaleIn, tmpSumsqIn, tmpScaleOut, tmpSumsqOut );
tmpScaleIn+=2;
tmpSumsqIn+=2;
}
......@@ -147,6 +147,24 @@ int CORE_zplssq( cham_store_t storev, int M, int N,
double *sclssqin,
double *sclssqout )
{
int i;
int K;
/* initialize pairs scale, sumsquare if not already done */
if ( storev == ChamColumnwise ) {
K = N;
} else if ( storev == ChamRowwise ) {
K = M;
} else {
K = 1;
}
for (i=0; i<2*K; i+=2) {
if ( ( sclssqout[i] == -1. ) && ( sclssqout[i+1] == -1. ) ) {
sclssqout[i] = 1.;
sclssqout[i+1] = 0.;
}
}
if (storev == ChamColumnwise) {
CORE_zplssq_col( M, N, sclssqin, sclssqout );
}
......@@ -173,7 +191,7 @@ int CORE_zplssq( cham_store_t storev, int M, int N,
* @param[in,out] sclssq
* The 2*N matrix on which to compute scl*sqrt(ssq)
* On entry contains all couple (scl,ssq) in (sclssq[i],sclssq[i+1])
* On exist returns sclssq[i]=sclssq[i]*sqrt(sclssq[i+1])
* On exist returns scl*sqrt(ssq) stored in sclssq[2*i], i = 0, ..., N-1
* so that the result is stored in the first line.
*
*******************************************************************************
......@@ -186,8 +204,8 @@ int CORE_zplssq2( int N,
double *sclssq )
{
int i;
for (i=0; i<N; i+=2) {
sclssq[i] *= sqrt ( sclssq[i+1] );
for (i=0; i<2*N; i+=2) {
sclssq[i] = sclssq[i]*sqrt(sclssq[i+1]);
}
return CHAMELEON_SUCCESS;