zgeadd.c 12 KB
Newer Older
1
/**
2 3
 *
 * @file zgeadd.c
4
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
5 6
 * @copyright 2009-2014 The University of Tennessee and The University of
 *                      Tennessee Research Foundation. All rights reserved.
Mathieu Faverge's avatar
Mathieu Faverge committed
7
 * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8
 *                      Univ. Bordeaux. All rights reserved.
9
 *
10
 ***
11
 *
12
 * @brief Chameleon zgeadd wrappers
13
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
14
 * @version 1.0.0
15
 * @comment This file has been automatically generated
16
 *          from Plasma 2.5.0 for CHAMELEON 1.0.0
17 18 19 20
 * @author Mathieu Faverge
 * @date 2011-11-03
 * @precisions normal z -> s d c
 *
21
 */
22 23
#include "control/common.h"

24 25
/**
 ********************************************************************************
26
 *
27
 * @ingroup CHAMELEON_Complex64_t
28
 *
29
 *  CHAMELEON_zgeadd - Performs a matrix addition similarly to the pzgeadd()
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
 *  function from the PBLAS library:
 *
 *    \f[ C = \alpha op( A ) + \beta B \f],
 *
 *  where op( X ) is one of
 *
 *    op( X ) = X  or op( X ) = X' or op( X ) = conjg( X' )
 *
 *  alpha and beta are scalars, and A, and B are matrices, with op( A ) and B
 *  two m by n matrices.
 *
 *******************************************************************************
 *
 * @param[in] trans
 *          Specifies whether the matrix A is transposed, not transposed or
 *          conjugate transposed:
46 47 48
 *          = ChamNoTrans:   A is not transposed;
 *          = ChamTrans:     A is transposed;
 *          = ChamConjTrans: A is conjugate transposed.
49 50 51 52 53 54 55 56 57 58 59
 *
 * @param[in] M
 *          M specifies the number of rows of the matrix op( A ) and of the matrix B. M >= 0.
 *
 * @param[in] N
 *          N specifies the number of columns of the matrix op( A ) and of the matrix B. N >= 0.
 *
 * @param[in] alpha
 *          alpha specifies the scalar alpha
 *
 * @param[in] A
60
 *          A is a LDA-by-ka matrix, where ka is N when trans = ChamNoTrans,
61 62 63 64
 *          and is M otherwise.
 *
 * @param[in] LDA
 *          The leading dimension of the array A. LDA >= max(1,K), where K is M
65
 *          when trans = ChamNoTrans, and is N when otherwise.
66 67 68 69 70 71 72 73 74 75 76 77
 *
 * @param[in] beta
 *          beta specifies the scalar beta
 *
 * @param[in,out] B
 *          B is a LDB-by-N matrix.
 *
 * @param[in] LDB
 *          The leading dimension of the array B. LDB >= max(1,M).
 *
 *******************************************************************************
 *
78
 * @retval CHAMELEON_SUCCESS successful exit
79 80 81
 *
 *******************************************************************************
 *
82 83 84 85
 * @sa CHAMELEON_zgeadd_Tile
 * @sa CHAMELEON_cgeadd
 * @sa CHAMELEON_dgeadd
 * @sa CHAMELEON_sgeadd
86
 *
87
 */
88 89 90
int CHAMELEON_zgeadd( cham_trans_t trans, int M, int N,
                  CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t *A, int LDA,
                  CHAMELEON_Complex64_t beta,  CHAMELEON_Complex64_t *B, int LDB )
91 92 93 94
{
    int NB;
    int Am, An;
    int status;
95 96
    CHAM_desc_t descAl, descAt;
    CHAM_desc_t descBl, descBt;
Mathieu Faverge's avatar
Mathieu Faverge committed
97
    CHAM_context_t *chamctxt;
98 99
    RUNTIME_sequence_t *sequence = NULL;
    RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
100

Mathieu Faverge's avatar
Mathieu Faverge committed
101 102 103
    chamctxt = chameleon_context_self();
    if (chamctxt == NULL) {
        chameleon_fatal_error("CHAMELEON_zgeadd", "CHAMELEON not initialized");
104
        return CHAMELEON_ERR_NOT_INITIALIZED;
105 106 107
    }

    /* Check input arguments */
108
    if ((trans < ChamNoTrans) || (trans > ChamConjTrans)) {
Mathieu Faverge's avatar
Mathieu Faverge committed
109
        chameleon_error("CHAMELEON_zgeadd", "illegal value of trans");
110 111
        return -1;
    }
112
    if ( trans == ChamNoTrans ) {
113 114 115 116 117
        Am = M; An = N;
    } else {
        Am = N; An = M;
    }
    if (M < 0) {
Mathieu Faverge's avatar
Mathieu Faverge committed
118
        chameleon_error("CHAMELEON_zgeadd", "illegal value of M");
119 120 121
        return -2;
    }
    if (N < 0) {
Mathieu Faverge's avatar
Mathieu Faverge committed
122
        chameleon_error("CHAMELEON_zgeadd", "illegal value of N");
123 124
        return -3;
    }
125
    if (LDA < chameleon_max(1, Am)) {
Mathieu Faverge's avatar
Mathieu Faverge committed
126
        chameleon_error("CHAMELEON_zgeadd", "illegal value of LDA");
127 128
        return -6;
    }
129
    if (LDB < chameleon_max(1, M)) {
Mathieu Faverge's avatar
Mathieu Faverge committed
130
        chameleon_error("CHAMELEON_zgeadd", "illegal value of LDB");
131 132 133 134 135
        return -9;
    }

    /* Quick return */
    if (M == 0 || N == 0 ||
136 137
        ((alpha == (CHAMELEON_Complex64_t)0.0) && beta == (CHAMELEON_Complex64_t)1.0))
        return CHAMELEON_SUCCESS;
138 139

    /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */
Mathieu Faverge's avatar
Mathieu Faverge committed
140
    status = chameleon_tune(CHAMELEON_FUNC_ZGEMM, M, N, 0);
141
    if (status != CHAMELEON_SUCCESS) {
Mathieu Faverge's avatar
Mathieu Faverge committed
142
        chameleon_error("CHAMELEON_zgeadd", "chameleon_tune() failed");
143 144 145 146
        return status;
    }

    /* Set MT & NT & KT */
147
    NB = CHAMELEON_NB;
148

Mathieu Faverge's avatar
Mathieu Faverge committed
149
    chameleon_sequence_create( chamctxt, &sequence );
150

151
    /* Submit the matrix conversion */
Mathieu Faverge's avatar
Mathieu Faverge committed
152
    chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInput, ChamUpperLower,
153
                     A, NB, NB, LDA, An, Am, An, sequence, &request );
Mathieu Faverge's avatar
Mathieu Faverge committed
154
    chameleon_zlap2tile( chamctxt, &descBl, &descBt, ChamDescInout, ChamUpperLower,
155
                     B, NB, NB, LDB, N, M, N, sequence, &request );
156 157

    /* Call the tile interface */
158
    CHAMELEON_zgeadd_Tile_Async( trans, alpha, &descAt, beta, &descBt, sequence, &request );
159

Mathieu Faverge's avatar
Mathieu Faverge committed
160
    /* Submit the matrix conversion back */
Mathieu Faverge's avatar
Mathieu Faverge committed
161
    chameleon_ztile2lap( chamctxt, &descAl, &descAt,
162
                     ChamDescInput, ChamUpperLower, sequence, &request );
Mathieu Faverge's avatar
Mathieu Faverge committed
163
    chameleon_ztile2lap( chamctxt, &descBl, &descBt,
164
                     ChamDescInout, ChamUpperLower, sequence, &request );
Mathieu Faverge's avatar
Mathieu Faverge committed
165

Mathieu Faverge's avatar
Mathieu Faverge committed
166
    chameleon_sequence_wait( chamctxt, sequence );
Mathieu Faverge's avatar
Mathieu Faverge committed
167

Mathieu Faverge's avatar
Mathieu Faverge committed
168
    /* Cleanup the temporary data */
Mathieu Faverge's avatar
Mathieu Faverge committed
169 170
    chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt );
    chameleon_ztile2lap_cleanup( chamctxt, &descBl, &descBt );
171 172

    status = sequence->status;
Mathieu Faverge's avatar
Mathieu Faverge committed
173
    chameleon_sequence_destroy( chamctxt, sequence );
174 175 176
    return status;
}

177 178
/**
 ********************************************************************************
179
 *
180
 * @ingroup CHAMELEON_Complex64_t_Tile
181
 *
182
 *  CHAMELEON_zgeadd_Tile - Performs a matrix addition similarly to the pzgeadd()
183
 *  function from the PBLAS library.
184
 *  Tile equivalent of CHAMELEON_zgeadd().
185 186 187 188 189 190 191 192 193
 *  Operates on matrices stored by tiles.
 *  All matrices are passed through descriptors.
 *  All dimensions are taken from the descriptors.
 *
 *******************************************************************************
 *
 * @param[in] trans
 *          Specifies whether the matrix A is transposed, not transposed or
 *          conjugate transposed:
194 195 196
 *          = ChamNoTrans:   A is not transposed;
 *          = ChamTrans:     A is transposed;
 *          = ChamConjTrans: A is conjugate transposed.
197 198 199 200 201
 *
 * @param[in] alpha
 *          alpha specifies the scalar alpha
 *
 * @param[in] A
202
 *          A is a LDA-by-ka matrix, where ka is N when trans = ChamNoTrans,
203 204 205 206 207 208 209 210 211 212
 *          and is M otherwise.
 *
 * @param[in] beta
 *          beta specifies the scalar beta
 *
 * @param[in,out] B
 *          B is a LDB-by-N matrix.
 *
 *******************************************************************************
 *
213
 * @retval CHAMELEON_SUCCESS successful exit
214 215 216
 *
 *******************************************************************************
 *
217 218 219 220 221
 * @sa CHAMELEON_zgeadd
 * @sa CHAMELEON_zgeadd_Tile_Async
 * @sa CHAMELEON_cgeadd_Tile
 * @sa CHAMELEON_dgeadd_Tile
 * @sa CHAMELEON_sgeadd_Tile
222
 *
223
 */
224 225 226
int CHAMELEON_zgeadd_Tile( cham_trans_t trans,
                       CHAMELEON_Complex64_t alpha, CHAM_desc_t *A,
                       CHAMELEON_Complex64_t beta,  CHAM_desc_t *B )
227
{
Mathieu Faverge's avatar
Mathieu Faverge committed
228
    CHAM_context_t *chamctxt;
229 230
    RUNTIME_sequence_t *sequence = NULL;
    RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
231 232
    int status;

Mathieu Faverge's avatar
Mathieu Faverge committed
233 234 235
    chamctxt = chameleon_context_self();
    if (chamctxt == NULL) {
        chameleon_fatal_error("CHAMELEON_zgeadd_Tile", "CHAMELEON not initialized");
236
        return CHAMELEON_ERR_NOT_INITIALIZED;
237
    }
Mathieu Faverge's avatar
Mathieu Faverge committed
238
    chameleon_sequence_create( chamctxt, &sequence );
239

240
    CHAMELEON_zgeadd_Tile_Async( trans, alpha, A, beta, B, sequence, &request );
241

242 243
    CHAMELEON_Desc_Flush( A, sequence );
    CHAMELEON_Desc_Flush( B, sequence );
Mathieu Faverge's avatar
Mathieu Faverge committed
244

Mathieu Faverge's avatar
Mathieu Faverge committed
245
    chameleon_sequence_wait( chamctxt, sequence );
246
    status = sequence->status;
Mathieu Faverge's avatar
Mathieu Faverge committed
247
    chameleon_sequence_destroy( chamctxt, sequence );
248 249 250
    return status;
}

251 252
/**
 ********************************************************************************
253
 *
254
 * @ingroup CHAMELEON_Complex64_t_Tile_Async
255
 *
256
 *  CHAMELEON_zgeadd_Tile_Async - Performs a matrix addition similarly to the
257
 *  pzgeadd() function from the PBLAS library.
258
 *  Non-blocking equivalent of CHAMELEON_zgeadd_Tile().
259 260 261 262 263 264 265 266 267 268 269 270 271 272
 *  May return before the computation is finished.
 *  Allows for pipelining of operations at runtime.
 *
 *******************************************************************************
 *
 * @param[in] sequence
 *          Identifies the sequence of function calls that this call belongs to
 *          (for completion checks and exception handling purposes).
 *
 * @param[out] request
 *          Identifies this function call (for exception handling purposes).
 *
 *******************************************************************************
 *
273 274 275 276 277
 * @sa CHAMELEON_zgeadd
 * @sa CHAMELEON_zgeadd_Tile
 * @sa CHAMELEON_cgeadd_Tile_Async
 * @sa CHAMELEON_dgeadd_Tile_Async
 * @sa CHAMELEON_sgeadd_Tile_Async
278
 *
279
 */
280 281 282 283
int CHAMELEON_zgeadd_Tile_Async( cham_trans_t trans,
                             CHAMELEON_Complex64_t alpha, CHAM_desc_t *A,
                             CHAMELEON_Complex64_t beta,  CHAM_desc_t *B,
                             RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
284
{
Mathieu Faverge's avatar
Mathieu Faverge committed
285
    CHAM_context_t *chamctxt;
286 287 288
    int M, N;
    int Am, An, Ai, Aj, Amb, Anb;

Mathieu Faverge's avatar
Mathieu Faverge committed
289 290 291
    chamctxt = chameleon_context_self();
    if (chamctxt == NULL) {
        chameleon_fatal_error("CHAMELEON_zgeadd_Tile_Async", "CHAMELEON not initialized");
292
        return CHAMELEON_ERR_NOT_INITIALIZED;
293 294
    }
    if (sequence == NULL) {
Mathieu Faverge's avatar
Mathieu Faverge committed
295
        chameleon_fatal_error("CHAMELEON_zgeadd_Tile_Async", "NULL sequence");
296
        return CHAMELEON_ERR_UNALLOCATED;
297 298
    }
    if (request == NULL) {
Mathieu Faverge's avatar
Mathieu Faverge committed
299
        chameleon_fatal_error("CHAMELEON_zgeadd_Tile_Async", "NULL request");
300
        return CHAMELEON_ERR_UNALLOCATED;
301 302
    }
    /* Check sequence status */
303 304
    if (sequence->status == CHAMELEON_SUCCESS) {
        request->status = CHAMELEON_SUCCESS;
Mathieu Faverge's avatar
Mathieu Faverge committed
305 306
    }
    else {
Mathieu Faverge's avatar
Mathieu Faverge committed
307
        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED);
Mathieu Faverge's avatar
Mathieu Faverge committed
308
    }
309 310

    /* Check descriptors for correctness */
Mathieu Faverge's avatar
Mathieu Faverge committed
311 312 313
    if (chameleon_desc_check(A) != CHAMELEON_SUCCESS) {
        chameleon_error("CHAMELEON_zgeadd_Tile_Async", "invalid first descriptor");
        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
314
    }
Mathieu Faverge's avatar
Mathieu Faverge committed
315 316 317
    if (chameleon_desc_check(B) != CHAMELEON_SUCCESS) {
        chameleon_error("CHAMELEON_zgeadd_Tile_Async", "invalid second descriptor");
        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
318 319
    }
    /* Check input arguments */
320
    if ((trans < ChamNoTrans) || (trans > ChamConjTrans)) {
Mathieu Faverge's avatar
Mathieu Faverge committed
321 322
        chameleon_error("CHAMELEON_zgeadd_Tile_Async", "illegal value of trans");
        return chameleon_request_fail(sequence, request, -1);
323 324
    }

325
    if ( trans == ChamNoTrans ) {
326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341
        Am  = A->m;
        An  = A->n;
        Amb = A->mb;
        Anb = A->nb;
        Ai  = A->i;
        Aj  = A->j;
    } else {
        Am  = A->n;
        An  = A->m;
        Amb = A->nb;
        Anb = A->mb;
        Ai  = A->j;
        Aj  = A->i;
    }

    if ( (Amb != B->mb) || (Anb != B->nb) ) {
Mathieu Faverge's avatar
Mathieu Faverge committed
342 343
        chameleon_error("CHAMELEON_zgeadd_Tile_Async", "tile sizes have to match");
        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
344 345
    }
    if ( (Am != B->m) || (An != B->n) ) {
Mathieu Faverge's avatar
Mathieu Faverge committed
346 347
        chameleon_error("CHAMELEON_zgeadd_Tile_Async", "sizes of matrices have to match");
        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
348 349
    }
    if ( (Ai != B->i) || (Aj != B->j) ) {
Mathieu Faverge's avatar
Mathieu Faverge committed
350 351
        chameleon_error("CHAMELEON_zgeadd_Tile_Async", "start indexes have to match");
        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
352 353 354 355 356 357
    }

    M = B->m;
    N = B->n;

    /* Quick return */
358
    if ( (M == 0) || (N == 0) ||
359
         ((alpha == (CHAMELEON_Complex64_t)0.0) && (beta == (CHAMELEON_Complex64_t)1.0)) )
360
    {
361
        return CHAMELEON_SUCCESS;
362
    }
363

Mathieu Faverge's avatar
Mathieu Faverge committed
364
    chameleon_pztradd( ChamUpperLower, trans, alpha, A, beta, B, sequence, request );
365

366
    return CHAMELEON_SUCCESS;
367
}