core_zgessq.c 5.4 KB
Newer Older
1
/**
2 3
 *
 * @file core_zgessq.c
4
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
5 6
 * @copyright 2009-2014 The University of Tennessee and The University of
 *                      Tennessee Research Foundation. All rights reserved.
PRUVOST Florent's avatar
Doc  
PRUVOST Florent committed
7
 * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8
 *                      Univ. Bordeaux. All rights reserved.
9
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
10
 ***
11
 *
12
 * @brief Chameleon core_zgessq CPU kernel
13
 *
PRUVOST Florent's avatar
Doc  
PRUVOST Florent committed
14
 * @version 0.9.2
15
 * @comment This file has been automatically generated
PRUVOST Florent's avatar
Doc  
PRUVOST Florent committed
16
 *          from Plasma 2.6.0 for CHAMELEON 0.9.2
17
 * @author Mathieu Faverge
18
 * @author Florent Pruvost
PRUVOST Florent's avatar
Doc  
PRUVOST Florent committed
19
 * @date 2014-11-16
20 21
 * @precisions normal z -> c d s
 *
22
 */
23
#include <math.h>
24
#include "coreblas/lapacke.h"
25
#include "coreblas/sumsq_update.h"
26
#include "coreblas.h"
27

28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
/**
 * @brief Subcase storev == ChamColumnwise of CORE_zgessq()
 */
static inline int
CORE_zgessq_col( int M, int N,
                 const CHAMELEON_Complex64_t *A, int LDA,
                 double *sclssq )
{
    int i, j;
    double tmp;
    double *ptr, *tmpScale, *tmpSumsq;

    for(j=0; j<N; j++) {
        ptr = (double*) ( A + j * LDA );
        tmpScale = sclssq+2*j;
        tmpSumsq = sclssq+2*j+1;
        for(i=0; i<M; i++, ptr++) {
            tmp = fabs(*ptr);
            sumsq_update( 1., tmpScale, tmpSumsq, &tmp );
#if defined(PRECISION_z) || defined(PRECISION_c)
            ptr++;
            tmp = fabs(*ptr);
            sumsq_update( 1., tmpScale, tmpSumsq, &tmp );
#endif
        }
53
    }
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
    return CHAMELEON_SUCCESS;
}
/**
 * @brief Subcase storev == ChamRowwise of CORE_zgessq()
 */
static inline int
CORE_zgessq_row( int M, int N,
                 const CHAMELEON_Complex64_t *A, int LDA,
                 double *sclssq )
{
    int i, j;
    double tmp;
    double *ptr, *tmpScale, *tmpSumsq;

    for(j=0; j<N; j++) {
        ptr = (double*) ( A + j * LDA );
        tmpScale = sclssq;
        tmpSumsq = sclssq+1;
        for(i=0; i<M; i++, ptr++, tmpScale+=2, tmpSumsq+=2) {
            tmp = fabs(*ptr);
            sumsq_update( 1., tmpScale, tmpSumsq, &tmp );
#if defined(PRECISION_z) || defined(PRECISION_c)
            ptr++;
            tmp = fabs(*ptr);
            sumsq_update( 1., tmpScale, tmpSumsq, &tmp );
#endif
        }
    }
    return CHAMELEON_SUCCESS;
}
/**
 * @brief Subcase storev == ChamEltwise of CORE_zgessq()
 */
static inline int
CORE_zgessq_elt( int M, int N,
                 const CHAMELEON_Complex64_t *A, int LDA,
                 double *sclssq )
{
    int i, j;
    double tmp;
    double *ptr;

    for(j=0; j<N; j++) {
        ptr = (double*) ( A + j * LDA );
        for(i=0; i<M; i++, ptr++) {
            tmp = fabs(*ptr);
            sumsq_update( 1., sclssq, sclssq+1, &tmp );
#if defined(PRECISION_z) || defined(PRECISION_c)
            ptr++;
            tmp = fabs(*ptr);
            sumsq_update( 1., sclssq, sclssq+1, &tmp );
#endif
        }
    }
    return CHAMELEON_SUCCESS;
}
110

111
/**
112
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
113
 * @ingroup CORE_CHAMELEON_Complex64_t
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
 *
 *  CORE_zgessq returns the values scl and ssq such that
 *
 *    ( scl**2 )*ssq = sum( A( i, j )**2 ) + ( scale**2 )*sumsq,
 *                     i,j
 *
 * with i from 0 to M-1 and j form 0 to N-1. The value of sumsq is
 * assumed to be at least unity and the value of ssq will then satisfy
 *
 *    1.0 .le. ssq .le. ( sumsq + 2*m*n ).
 *
 * scale is assumed to be non-negative and scl returns the value
 *
 *    scl = max( scale, abs( real( x( i ) ) ), abs( aimag( x( i ) ) ) ),
 *           i
 *
 * scale and sumsq must be supplied in SCALE and SUMSQ respectively.
 * SCALE and SUMSQ are overwritten by scl and ssq respectively.
 *
 * The routine makes only one pass through the tile A.
 * See also LAPACK zlassq.f
 *
 *******************************************************************************
 *
138 139 140 141 142 143
 * @param[in] storev
 *          Specifies whether the sums are made per column or row.
 *          = ChamColumnwise: Computes the sum of squares on each column
 *          = ChamRowwise:    Computes the sum of squares on each row
 *          = ChamEltwise:    Computes the sum of squares on all the matrix
 *
144 145 146 147 148 149 150 151 152 153 154 155
 *  @param[in] M
 *          The number of rows in the tile A.
 *
 *  @param[in] N
 *          The number of columns in the tile A.
 *
 *  @param[in] A
 *          The M-by-N matrix on which to compute the norm.
 *
 *  @param[in] LDA
 *          The leading dimension of the tile A. LDA >= max(1,M).
 *
156 157 158 159 160 161 162 163 164 165
 *  @param[in,out] sclssq
 *          Dimension:  (2,K)
 *          if storev == ChamColumnwise, K = N
 *          if storev == ChamRowwise,    K = M
 *          if storev == ChamEltwise,    K = 1
 *          On entry, sclssq contains K couples (sclssq[2*i], sclssq[2*i+1])
 *          which corresponds to (scale, sumsq) in the equation below
 *          ( scl**2 )*ssq = sum( A( i, j )**2 ) + ( scale**2 )*sumsq,
 *          respectively for the columns, the rows and the full matrix
 *          On exit, each couple is overwritten with the final result (scl, ssq).
166 167 168
 *
 *******************************************************************************
 *
169 170
 * @retval CHAMELEON_SUCCESS successful exit
 * @retval -k, the k-th argument had an illegal value
171 172
 *
 */
173 174 175
int CORE_zgessq( cham_store_t storev, int M, int N,
                 const CHAMELEON_Complex64_t *A, int LDA,
                 double *sclssq )
176
{
177 178 179 180 181 182 183 184
    if (storev == ChamColumnwise) {
        CORE_zgessq_col( M, N, A, LDA, sclssq );
    }
    else if (storev == ChamRowwise) {
        CORE_zgessq_row( M, N, A, LDA, sclssq );
    }
    else {
        CORE_zgessq_elt( M, N, A, LDA, sclssq );
185
    }
Mathieu Faverge's avatar
Mathieu Faverge committed
186
    return CHAMELEON_SUCCESS;
187
}