zgeadd.c 12 KB
Newer Older
1
/**
2 3
 *
 * @file zgeadd.c
4
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
5 6
 * @copyright 2009-2014 The University of Tennessee and The University of
 *                      Tennessee Research Foundation. All rights reserved.
Mathieu Faverge's avatar
Mathieu Faverge committed
7
 * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8
 *                      Univ. Bordeaux. All rights reserved.
9
 *
10
 ***
11
 *
12
 * @brief Chameleon zgeadd wrappers
13
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
14
 * @version 1.0.0
15
 * @comment This file has been automatically generated
16
 *          from Plasma 2.5.0 for CHAMELEON 1.0.0
17 18 19 20
 * @author Mathieu Faverge
 * @date 2011-11-03
 * @precisions normal z -> s d c
 *
21
 */
22 23
#include "control/common.h"

24 25
/**
 ********************************************************************************
26
 *
27
 * @ingroup CHAMELEON_Complex64_t
28
 *
29
 *  CHAMELEON_zgeadd - Performs a matrix addition similarly to the pzgeadd()
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
 *  function from the PBLAS library:
 *
 *    \f[ C = \alpha op( A ) + \beta B \f],
 *
 *  where op( X ) is one of
 *
 *    op( X ) = X  or op( X ) = X' or op( X ) = conjg( X' )
 *
 *  alpha and beta are scalars, and A, and B are matrices, with op( A ) and B
 *  two m by n matrices.
 *
 *******************************************************************************
 *
 * @param[in] trans
 *          Specifies whether the matrix A is transposed, not transposed or
 *          conjugate transposed:
46 47 48
 *          = ChamNoTrans:   A is not transposed;
 *          = ChamTrans:     A is transposed;
 *          = ChamConjTrans: A is conjugate transposed.
49 50 51 52 53 54 55 56 57 58 59
 *
 * @param[in] M
 *          M specifies the number of rows of the matrix op( A ) and of the matrix B. M >= 0.
 *
 * @param[in] N
 *          N specifies the number of columns of the matrix op( A ) and of the matrix B. N >= 0.
 *
 * @param[in] alpha
 *          alpha specifies the scalar alpha
 *
 * @param[in] A
60
 *          A is a LDA-by-ka matrix, where ka is N when trans = ChamNoTrans,
61 62 63 64
 *          and is M otherwise.
 *
 * @param[in] LDA
 *          The leading dimension of the array A. LDA >= max(1,K), where K is M
65
 *          when trans = ChamNoTrans, and is N when otherwise.
66 67 68 69 70 71 72 73 74 75 76 77 78
 *
 * @param[in] beta
 *          beta specifies the scalar beta
 *
 * @param[in,out] B
 *          B is a LDB-by-N matrix.
 *
 * @param[in] LDB
 *          The leading dimension of the array B. LDB >= max(1,M).
 *
 *******************************************************************************
 *
 * @return
79
 *          \retval CHAMELEON_SUCCESS successful exit
80 81 82
 *
 *******************************************************************************
 *
83 84 85 86
 * @sa CHAMELEON_zgeadd_Tile
 * @sa CHAMELEON_cgeadd
 * @sa CHAMELEON_dgeadd
 * @sa CHAMELEON_sgeadd
87
 *
88
 */
89 90 91
int CHAMELEON_zgeadd( cham_trans_t trans, int M, int N,
                  CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t *A, int LDA,
                  CHAMELEON_Complex64_t beta,  CHAMELEON_Complex64_t *B, int LDB )
92 93 94 95
{
    int NB;
    int Am, An;
    int status;
96 97
    CHAM_desc_t descAl, descAt;
    CHAM_desc_t descBl, descBt;
Mathieu Faverge's avatar
Mathieu Faverge committed
98
    CHAM_context_t *chamctxt;
99 100
    RUNTIME_sequence_t *sequence = NULL;
    RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
101

Mathieu Faverge's avatar
Mathieu Faverge committed
102 103 104
    chamctxt = chameleon_context_self();
    if (chamctxt == NULL) {
        chameleon_fatal_error("CHAMELEON_zgeadd", "CHAMELEON not initialized");
105
        return CHAMELEON_ERR_NOT_INITIALIZED;
106 107 108
    }

    /* Check input arguments */
109
    if ((trans < ChamNoTrans) || (trans > ChamConjTrans)) {
Mathieu Faverge's avatar
Mathieu Faverge committed
110
        chameleon_error("CHAMELEON_zgeadd", "illegal value of trans");
111 112
        return -1;
    }
113
    if ( trans == ChamNoTrans ) {
114 115 116 117 118
        Am = M; An = N;
    } else {
        Am = N; An = M;
    }
    if (M < 0) {
Mathieu Faverge's avatar
Mathieu Faverge committed
119
        chameleon_error("CHAMELEON_zgeadd", "illegal value of M");
120 121 122
        return -2;
    }
    if (N < 0) {
Mathieu Faverge's avatar
Mathieu Faverge committed
123
        chameleon_error("CHAMELEON_zgeadd", "illegal value of N");
124 125
        return -3;
    }
126
    if (LDA < chameleon_max(1, Am)) {
Mathieu Faverge's avatar
Mathieu Faverge committed
127
        chameleon_error("CHAMELEON_zgeadd", "illegal value of LDA");
128 129
        return -6;
    }
130
    if (LDB < chameleon_max(1, M)) {
Mathieu Faverge's avatar
Mathieu Faverge committed
131
        chameleon_error("CHAMELEON_zgeadd", "illegal value of LDB");
132 133 134 135 136
        return -9;
    }

    /* Quick return */
    if (M == 0 || N == 0 ||
137 138
        ((alpha == (CHAMELEON_Complex64_t)0.0) && beta == (CHAMELEON_Complex64_t)1.0))
        return CHAMELEON_SUCCESS;
139 140

    /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */
Mathieu Faverge's avatar
Mathieu Faverge committed
141
    status = chameleon_tune(CHAMELEON_FUNC_ZGEMM, M, N, 0);
142
    if (status != CHAMELEON_SUCCESS) {
Mathieu Faverge's avatar
Mathieu Faverge committed
143
        chameleon_error("CHAMELEON_zgeadd", "chameleon_tune() failed");
144 145 146 147
        return status;
    }

    /* Set MT & NT & KT */
148
    NB = CHAMELEON_NB;
149

Mathieu Faverge's avatar
Mathieu Faverge committed
150
    chameleon_sequence_create( chamctxt, &sequence );
151

152
    /* Submit the matrix conversion */
Mathieu Faverge's avatar
Mathieu Faverge committed
153
    chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInput, ChamUpperLower,
154
                     A, NB, NB, LDA, An, Am, An, sequence, &request );
Mathieu Faverge's avatar
Mathieu Faverge committed
155
    chameleon_zlap2tile( chamctxt, &descBl, &descBt, ChamDescInout, ChamUpperLower,
156
                     B, NB, NB, LDB, N, M, N, sequence, &request );
157 158

    /* Call the tile interface */
159
    CHAMELEON_zgeadd_Tile_Async( trans, alpha, &descAt, beta, &descBt, sequence, &request );
160

Mathieu Faverge's avatar
Mathieu Faverge committed
161
    /* Submit the matrix conversion back */
Mathieu Faverge's avatar
Mathieu Faverge committed
162
    chameleon_ztile2lap( chamctxt, &descAl, &descAt,
163
                     ChamDescInput, ChamUpperLower, sequence, &request );
Mathieu Faverge's avatar
Mathieu Faverge committed
164
    chameleon_ztile2lap( chamctxt, &descBl, &descBt,
165
                     ChamDescInout, ChamUpperLower, sequence, &request );
Mathieu Faverge's avatar
Mathieu Faverge committed
166

Mathieu Faverge's avatar
Mathieu Faverge committed
167
    chameleon_sequence_wait( chamctxt, sequence );
Mathieu Faverge's avatar
Mathieu Faverge committed
168

Mathieu Faverge's avatar
Mathieu Faverge committed
169
    /* Cleanup the temporary data */
Mathieu Faverge's avatar
Mathieu Faverge committed
170 171
    chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt );
    chameleon_ztile2lap_cleanup( chamctxt, &descBl, &descBt );
172 173

    status = sequence->status;
Mathieu Faverge's avatar
Mathieu Faverge committed
174
    chameleon_sequence_destroy( chamctxt, sequence );
175 176 177
    return status;
}

178 179
/**
 ********************************************************************************
180
 *
181
 * @ingroup CHAMELEON_Complex64_t_Tile
182
 *
183
 *  CHAMELEON_zgeadd_Tile - Performs a matrix addition similarly to the pzgeadd()
184
 *  function from the PBLAS library.
185
 *  Tile equivalent of CHAMELEON_zgeadd().
186 187 188 189 190 191 192 193 194
 *  Operates on matrices stored by tiles.
 *  All matrices are passed through descriptors.
 *  All dimensions are taken from the descriptors.
 *
 *******************************************************************************
 *
 * @param[in] trans
 *          Specifies whether the matrix A is transposed, not transposed or
 *          conjugate transposed:
195 196 197
 *          = ChamNoTrans:   A is not transposed;
 *          = ChamTrans:     A is transposed;
 *          = ChamConjTrans: A is conjugate transposed.
198 199 200 201 202
 *
 * @param[in] alpha
 *          alpha specifies the scalar alpha
 *
 * @param[in] A
203
 *          A is a LDA-by-ka matrix, where ka is N when trans = ChamNoTrans,
204 205 206 207 208 209 210 211 212 213 214
 *          and is M otherwise.
 *
 * @param[in] beta
 *          beta specifies the scalar beta
 *
 * @param[in,out] B
 *          B is a LDB-by-N matrix.
 *
 *******************************************************************************
 *
 * @return
215
 *          \retval CHAMELEON_SUCCESS successful exit
216 217 218
 *
 *******************************************************************************
 *
219 220 221 222 223
 * @sa CHAMELEON_zgeadd
 * @sa CHAMELEON_zgeadd_Tile_Async
 * @sa CHAMELEON_cgeadd_Tile
 * @sa CHAMELEON_dgeadd_Tile
 * @sa CHAMELEON_sgeadd_Tile
224
 *
225
 */
226 227 228
int CHAMELEON_zgeadd_Tile( cham_trans_t trans,
                       CHAMELEON_Complex64_t alpha, CHAM_desc_t *A,
                       CHAMELEON_Complex64_t beta,  CHAM_desc_t *B )
229
{
Mathieu Faverge's avatar
Mathieu Faverge committed
230
    CHAM_context_t *chamctxt;
231 232
    RUNTIME_sequence_t *sequence = NULL;
    RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
233 234
    int status;

Mathieu Faverge's avatar
Mathieu Faverge committed
235 236 237
    chamctxt = chameleon_context_self();
    if (chamctxt == NULL) {
        chameleon_fatal_error("CHAMELEON_zgeadd_Tile", "CHAMELEON not initialized");
238
        return CHAMELEON_ERR_NOT_INITIALIZED;
239
    }
Mathieu Faverge's avatar
Mathieu Faverge committed
240
    chameleon_sequence_create( chamctxt, &sequence );
241

242
    CHAMELEON_zgeadd_Tile_Async( trans, alpha, A, beta, B, sequence, &request );
243

244 245
    CHAMELEON_Desc_Flush( A, sequence );
    CHAMELEON_Desc_Flush( B, sequence );
Mathieu Faverge's avatar
Mathieu Faverge committed
246

Mathieu Faverge's avatar
Mathieu Faverge committed
247
    chameleon_sequence_wait( chamctxt, sequence );
248
    status = sequence->status;
Mathieu Faverge's avatar
Mathieu Faverge committed
249
    chameleon_sequence_destroy( chamctxt, sequence );
250 251 252
    return status;
}

253 254
/**
 ********************************************************************************
255
 *
256
 * @ingroup CHAMELEON_Complex64_t_Tile_Async
257
 *
258
 *  CHAMELEON_zgeadd_Tile_Async - Performs a matrix addition similarly to the
259
 *  pzgeadd() function from the PBLAS library.
260
 *  Non-blocking equivalent of CHAMELEON_zgeadd_Tile().
261 262 263 264 265 266 267 268 269 270 271 272 273 274
 *  May return before the computation is finished.
 *  Allows for pipelining of operations at runtime.
 *
 *******************************************************************************
 *
 * @param[in] sequence
 *          Identifies the sequence of function calls that this call belongs to
 *          (for completion checks and exception handling purposes).
 *
 * @param[out] request
 *          Identifies this function call (for exception handling purposes).
 *
 *******************************************************************************
 *
275 276 277 278 279
 * @sa CHAMELEON_zgeadd
 * @sa CHAMELEON_zgeadd_Tile
 * @sa CHAMELEON_cgeadd_Tile_Async
 * @sa CHAMELEON_dgeadd_Tile_Async
 * @sa CHAMELEON_sgeadd_Tile_Async
280
 *
281
 */
282 283 284 285
int CHAMELEON_zgeadd_Tile_Async( cham_trans_t trans,
                             CHAMELEON_Complex64_t alpha, CHAM_desc_t *A,
                             CHAMELEON_Complex64_t beta,  CHAM_desc_t *B,
                             RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
286
{
Mathieu Faverge's avatar
Mathieu Faverge committed
287
    CHAM_context_t *chamctxt;
288 289 290
    int M, N;
    int Am, An, Ai, Aj, Amb, Anb;

Mathieu Faverge's avatar
Mathieu Faverge committed
291 292 293
    chamctxt = chameleon_context_self();
    if (chamctxt == NULL) {
        chameleon_fatal_error("CHAMELEON_zgeadd_Tile_Async", "CHAMELEON not initialized");
294
        return CHAMELEON_ERR_NOT_INITIALIZED;
295 296
    }
    if (sequence == NULL) {
Mathieu Faverge's avatar
Mathieu Faverge committed
297
        chameleon_fatal_error("CHAMELEON_zgeadd_Tile_Async", "NULL sequence");
298
        return CHAMELEON_ERR_UNALLOCATED;
299 300
    }
    if (request == NULL) {
Mathieu Faverge's avatar
Mathieu Faverge committed
301
        chameleon_fatal_error("CHAMELEON_zgeadd_Tile_Async", "NULL request");
302
        return CHAMELEON_ERR_UNALLOCATED;
303 304
    }
    /* Check sequence status */
305 306
    if (sequence->status == CHAMELEON_SUCCESS) {
        request->status = CHAMELEON_SUCCESS;
Mathieu Faverge's avatar
Mathieu Faverge committed
307 308
    }
    else {
Mathieu Faverge's avatar
Mathieu Faverge committed
309
        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED);
Mathieu Faverge's avatar
Mathieu Faverge committed
310
    }
311 312

    /* Check descriptors for correctness */
Mathieu Faverge's avatar
Mathieu Faverge committed
313 314 315
    if (chameleon_desc_check(A) != CHAMELEON_SUCCESS) {
        chameleon_error("CHAMELEON_zgeadd_Tile_Async", "invalid first descriptor");
        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
316
    }
Mathieu Faverge's avatar
Mathieu Faverge committed
317 318 319
    if (chameleon_desc_check(B) != CHAMELEON_SUCCESS) {
        chameleon_error("CHAMELEON_zgeadd_Tile_Async", "invalid second descriptor");
        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
320 321
    }
    /* Check input arguments */
322
    if ((trans < ChamNoTrans) || (trans > ChamConjTrans)) {
Mathieu Faverge's avatar
Mathieu Faverge committed
323 324
        chameleon_error("CHAMELEON_zgeadd_Tile_Async", "illegal value of trans");
        return chameleon_request_fail(sequence, request, -1);
325 326
    }

327
    if ( trans == ChamNoTrans ) {
328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343
        Am  = A->m;
        An  = A->n;
        Amb = A->mb;
        Anb = A->nb;
        Ai  = A->i;
        Aj  = A->j;
    } else {
        Am  = A->n;
        An  = A->m;
        Amb = A->nb;
        Anb = A->mb;
        Ai  = A->j;
        Aj  = A->i;
    }

    if ( (Amb != B->mb) || (Anb != B->nb) ) {
Mathieu Faverge's avatar
Mathieu Faverge committed
344 345
        chameleon_error("CHAMELEON_zgeadd_Tile_Async", "tile sizes have to match");
        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
346 347
    }
    if ( (Am != B->m) || (An != B->n) ) {
Mathieu Faverge's avatar
Mathieu Faverge committed
348 349
        chameleon_error("CHAMELEON_zgeadd_Tile_Async", "sizes of matrices have to match");
        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
350 351
    }
    if ( (Ai != B->i) || (Aj != B->j) ) {
Mathieu Faverge's avatar
Mathieu Faverge committed
352 353
        chameleon_error("CHAMELEON_zgeadd_Tile_Async", "start indexes have to match");
        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
354 355 356 357 358 359
    }

    M = B->m;
    N = B->n;

    /* Quick return */
360
    if ( (M == 0) || (N == 0) ||
361
         ((alpha == (CHAMELEON_Complex64_t)0.0) && (beta == (CHAMELEON_Complex64_t)1.0)) )
362
    {
363
        return CHAMELEON_SUCCESS;
364
    }
365

Mathieu Faverge's avatar
Mathieu Faverge committed
366
    chameleon_pztradd( ChamUpperLower, trans, alpha, A, beta, B, sequence, request );
367

368
    return CHAMELEON_SUCCESS;
369
}