Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 74b94900 authored by Guillaume Sylvand's avatar Guillaume Sylvand
Browse files

Add new function Morse_zbuild() to fill a matrix with a user-defined function

The new codelet is added for all 3 runtimes but only tested with starpu ;-)
An example test7 is added : it is a copy of test6 that uses the new function to build the matrices
parent 9dd3d707
No related branches found
No related tags found
No related merge requests found
Showing
with 1088 additions and 2 deletions
......@@ -203,7 +203,12 @@ set(ZSRC
#pzpack.c
pztile.c
ztile.c
)
##################
# BUILD
##################
zbuild.c
pzbuild.c
)
precisions_rules_py(CHAMELEON_SRCS_GENERATED "${ZSRC}"
PRECISIONS "${CHAMELEON_PRECISION}")
......
/**
*
* @copyright (c) 2009-2014 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2014 Inria. All rights reserved.
* @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file pzbuild.c
*
* MORSE auxiliary routines
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver
*
* @version 2.5.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for MORSE 1.0.0
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Guillaume Sylvand
* @date 2016-09-05
* @precisions normal z -> s d c
*
**/
#include "control/common.h"
#define A(m, n) A, m, n
/***************************************************************************//**
* Parallel tile matrix generation
*
*******************************************************************************
*
* @param[in] uplo
* Specifies the part of the matrix A to be copied to B.
* = MorseUpperLower: All the matrix A
* = MorseUpper: Upper triangular part
* = MorseLower: Lower triangular part
*
* @param[in] A
* On exit, The matrix A generated.
*
* @param[in] user_data
* The data used in the matrix generation.
*
* @param[in] user_build_callback
* The function called by the codelet to fill the tiles
*
* @param[in] sequence
* Identifies the sequence of function calls that this call belongs to
* (for completion checks and exception handling purposes).
*
* @param[out] request
* Identifies this function call (for exception handling purposes).
*
*******************************************************************************/
void morse_pzbuild( MORSE_enum uplo, MORSE_desc_t *A, void *user_data, void* user_build_callback,
MORSE_sequence_t *sequence, MORSE_request_t *request )
{
MORSE_context_t *morse;
MORSE_option_t options;
int m, n;
int ldam;
morse = morse_context_self();
if (sequence->status != MORSE_SUCCESS)
return;
RUNTIME_options_init(&options, morse, sequence, request);
for (m = 0; m < A->mt; m++) {
ldam = BLKLDD(A, m);
for (n = 0; n < A->nt; n++) {
if ( uplo == MorseUpper && m <= n ||
uplo == MorseLower && m >= n ||
uplo == MorseUpperLower)
MORSE_TASK_zbuild(
&options,
A(m, n), ldam,
user_data, user_build_callback );
}
}
RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();
}
/**
*
* @copyright (c) 2009-2014 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2014 Inria. All rights reserved.
* @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file zbuild.c
*
* MORSE computational routines
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver
*
* @version 2.5.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for MORSE 1.0.0
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Guillaume Sylvand
* @date 2016-09-05
* @precisions normal z -> s d c
*
**/
#include "control/common.h"
/***************************************************************************//**
*
* @ingroup MORSE_Complex64_t
*
* MORSE_zbuild - Generate a matrix by calling user provided function.
*
*******************************************************************************
*
* @param[in] uplo
* Specifies the part of the matrix A to be copied to B.
* = MorseUpperLower: All the matrix A
* = MorseUpper: Upper triangular part
* = MorseLower: Lower triangular part
*
* @param[in] M
* The number of rows of A.
*
* @param[in] N
* The order of the matrix A. N >= 0.
*
* @param[out] A
* On exit, The matrix A generated.
*
* @param[in] LDA
* The leading dimension of the array A. LDA >= max(1,M).
*
* @param[in] user_data
* The user data used in the matrix generation, it will be passed by chameleon
* to the build callback function (see below).
*
* @param[in] user_build_callback
* The user function to call to generate tiles.
* The prototype of the callback is :
* void myFcn(int row_min, int row_max, int col_min, int col_max, void *buffer, int ld, void *user_data)
* It is expected to build the block of matrix [row_min, row_max] x [col_min, col_max]
* (with both min and max values included in the intervals,
* index start at 0 like in C, NOT 1 like in Fortran)
* and store it at the adresse 'buffer' with leading dimension 'ld'
* The argument 'user_data' is an opaque pointer on any user data, it is passed by
* the user to Morse_zbuild (see above) and transmitted by chameleon to the callback.
*
*******************************************************************************
*
* @return
* \retval MORSE_SUCCESS successful exit
* \retval <0 if -i, the i-th argument had an illegal value
*
*******************************************************************************
*
* @sa MORSE_zbuild_Tile
* @sa MORSE_zbuild_Tile_Async
* @sa MORSE_cbuild
* @sa MORSE_dbuild
* @sa MORSE_sbuild
*
******************************************************************************/
int MORSE_zbuild( MORSE_enum uplo, int M, int N,
MORSE_Complex64_t *A, int LDA,
void *user_data, void* user_build_callback)
{
int NB;
int status;
MORSE_context_t *morse;
MORSE_sequence_t *sequence = NULL;
MORSE_request_t request = MORSE_REQUEST_INITIALIZER;
MORSE_desc_t descA;
morse = morse_context_self();
if (morse == NULL) {
morse_fatal_error("MORSE_zbuild", "MORSE not initialized");
return MORSE_ERR_NOT_INITIALIZED;
}
/* Check input arguments */
if (M < 0) {
morse_error("MORSE_zbuild", "illegal value of M");
return -1;
}
if (N < 0) {
morse_error("MORSE_zbuild", "illegal value of N");
return -2;
}
if (LDA < max(1, M)) {
morse_error("MORSE_zbuild", "illegal value of LDA");
return -4;
}
/* Quick return */
if (min(M, N) == 0)
return MORSE_SUCCESS;
/* Tune NB depending on M, N & NRHS; Set NBNB */
status = morse_tune(MORSE_FUNC_ZGEMM, M, N, 0);
if (status != MORSE_SUCCESS) {
morse_error("MORSE_zbuild", "morse_tune() failed");
return status;
}
/* Set NT */
NB = MORSE_NB;
morse_sequence_create(morse, &sequence);
morse_zdesc_alloc(descA, NB, NB, LDA, N, 0, 0, M, N, morse_desc_mat_free(&descA));
/* Call the tile interface */
MORSE_zbuild_Tile_Async(uplo, &descA, user_data, user_build_callback, sequence, &request );
morse_zooptile2lap(descA, A, NB, NB, LDA, N, sequence, &request);
morse_sequence_wait(morse, sequence);
morse_desc_mat_free(&descA);
status = sequence->status;
morse_sequence_destroy(morse, sequence);
return status;
}
/***************************************************************************//**
*
* @ingroup MORSE_Complex64_t_Tile
*
* MORSE_zbuild_Tile - Generate a matrix by tiles by calling user provided function.
* Tile equivalent of MORSE_zbuild().
* Operates on matrices stored by tiles.
* All matrices are passed through descriptors.
* All dimensions are taken from the descriptors.
*
*******************************************************************************
*
* @param[in] uplo
* Specifies the part of the matrix A to be copied to B.
* = MorseUpperLower: All the matrix A
* = MorseUpper: Upper triangular part
* = MorseLower: Lower triangular part
*
* @param[in] A
* On exit, The matrix A generated.
*
* @param[in] user_data
* The data used in the matrix generation.
*
* @param[in] user_build_callback
* The function called by the codelet to fill the tiles (see MORSE_zbuild)
*
*******************************************************************************
*
* @return
* \retval MORSE_SUCCESS successful exit
*
*******************************************************************************
*
* @sa MORSE_zbuild
* @sa MORSE_zbuild_Tile_Async
* @sa MORSE_cbuild_Tile
* @sa MORSE_dbuild_Tile
* @sa MORSE_sbuild_Tile
*
******************************************************************************/
int MORSE_zbuild_Tile( MORSE_enum uplo, MORSE_desc_t *A,
void *user_data, void* user_build_callback )
{
MORSE_context_t *morse;
MORSE_sequence_t *sequence = NULL;
MORSE_request_t request = MORSE_REQUEST_INITIALIZER;
int status;
morse = morse_context_self();
if (morse == NULL) {
morse_fatal_error("MORSE_zbuild_Tile", "MORSE not initialized");
return MORSE_ERR_NOT_INITIALIZED;
}
morse_sequence_create(morse, &sequence);
MORSE_zbuild_Tile_Async( uplo, A, user_data, user_build_callback, sequence, &request );
morse_sequence_wait(morse, sequence);
status = sequence->status;
morse_sequence_destroy(morse, sequence);
return status;
}
/***************************************************************************//**
*
* @ingroup MORSE_Complex64_t_Tile_Async
*
* MORSE_zbuild_Tile_Async - Generate a matrix by tiles by calling user provided function.
* Non-blocking equivalent of MORSE_zbuild_Tile().
* May return before the computation is finished.
* Allows for pipelining of operations at runtime.
*
*******************************************************************************
*
* @param[in] uplo
* Specifies the part of the matrix A to be copied to B.
* = MorseUpperLower: All the matrix A
* = MorseUpper: Upper triangular part
* = MorseLower: Lower triangular part
*
* @param[in] A
* On exit, The matrix A generated.
*
* @param[in] user_data
* The data used in the matrix generation.
*
* @param[in] user_build_callback
* The function called by the codelet to fill the tiles (see MORSE_zbuild)
*
* @param[in] sequence
* Identifies the sequence of function calls that this call belongs to
* (for completion checks and exception handling purposes).
*
* @param[out] request
* Identifies this function call (for exception handling purposes).
*
*******************************************************************************
*
* @sa MORSE_zbuild
* @sa MORSE_zbuild_Tile
* @sa MORSE_cbuild_Tile_Async
* @sa MORSE_dbuild_Tile_Async
* @sa MORSE_sbuild_Tile_Async
*
******************************************************************************/
int MORSE_zbuild_Tile_Async( MORSE_enum uplo, MORSE_desc_t *A,
void *user_data, void* user_build_callback,
MORSE_sequence_t *sequence,
MORSE_request_t *request)
{
MORSE_context_t *morse;
morse = morse_context_self();
if (morse == NULL) {
morse_fatal_error("MORSE_zbuild_Tile", "MORSE not initialized");
return MORSE_ERR_NOT_INITIALIZED;
}
if (sequence == NULL) {
morse_fatal_error("MORSE_zbuild_Tile", "NULL sequence");
return MORSE_ERR_UNALLOCATED;
}
if (request == NULL) {
morse_fatal_error("MORSE_zbuild_Tile", "NULL request");
return MORSE_ERR_UNALLOCATED;
}
/* Check sequence status */
if (sequence->status == MORSE_SUCCESS)
request->status = MORSE_SUCCESS;
else
return morse_request_fail(sequence, request, MORSE_ERR_SEQUENCE_FLUSHED);
/* Check descriptors for correctness */
if (morse_desc_check(A) != MORSE_SUCCESS) {
morse_error("MORSE_zbuild_Tile", "invalid descriptor");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
/* Quick return */
if (min( A->m, A->n ) == 0)
return MORSE_SUCCESS;
morse_pzbuild(uplo, A, user_data, user_build_callback, sequence, request);
return MORSE_SUCCESS;
}
......@@ -154,3 +154,4 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_des
void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pzbuild( MORSE_enum uplo, MORSE_desc_t *A, void *user_data, void* user_build_callback, MORSE_sequence_t *sequence, MORSE_request_t *request );
......@@ -32,6 +32,7 @@ set(LTM_SOURCES
step4.c
step5.c
step6.c
step7.c
)
# Define what libraries we have to link with
......
/**
*
* @copyright (c) 2009-2014 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2014 Inria. All rights reserved.
* @copyright (c) 2012-2015 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file step7.c
*
* MORSE example routines
* MORSE is a software package provided by Inria Bordeaux - Sud-Ouest, LaBRI,
* University of Bordeaux, Bordeaux INP
*
* @version 1.0.0
* @author Florent Pruvost
* @author Guillaume Sylvand
* @date 2016-09-05
*
**/
#include "step7.h"
/*
* @brief step7 introduces how to use the build interface.
* @details This program is a copy of step6 with some additional calls to
* build a matrix from within chameleon using a function provided by the user.
* This can be seen as a replacement of the function like MORSE_dplgsy_Tile() that can be used
* to fill the matrix with random data, MORSE_dLapack_to_Tile() to fill the matrix
* with data stored in a lapack-like buffer, or MORSE_Desc_Create_User() that can be used
* to describe an arbitrary tile matrix structure.
* In this example, the build callback function are just wrapper towards CORE_xxx() functions, so the output
* of the program step7 should be exactly similar to that of step6.
* The difference is that the funtion used to fill the tiles is provided by the user,
* and therefore this approach is much more flexible.
*/
int main(int argc, char *argv[]) {
size_t N; // matrix order
int NB; // number of rows and columns in tiles
int NRHS; // number of RHS vectors
int NCPU; // number of cores to use
int NGPU; // number of gpus (cuda devices) to use
int GRID_P; // parameter of the 2D block cyclic distribution
int GRID_Q; // parameter of the 2D block cyclic distribution
int NMPIPROC = 1; // number of MPI processus
int UPLO = MorseUpper; // where is stored L
/* descriptors necessary for calling MORSE tile interface */
MORSE_desc_t *descA = NULL, *descAC = NULL, *descB = NULL, *descX = NULL;
/* declarations to time the program and evaluate performances */
double fmuls, fadds, flops, gflops, cpu_time;
/* variable to check the numerical results */
double anorm, bnorm, xnorm, eps, res;
int hres;
/* MORSE sequence uniquely identifies a set of asynchronous function calls
* sharing common exception handling */
MORSE_sequence_t *sequence = NULL;
/* MORSE request uniquely identifies each asynchronous function call */
MORSE_request_t request = MORSE_REQUEST_INITIALIZER;
int status;
/* initialize some parameters with default values */
int iparam[IPARAM_SIZEOF];
memset(iparam, 0, IPARAM_SIZEOF*sizeof(int));
init_iparam(iparam);
/* read arguments */
read_args(argc, argv, iparam);
N = iparam[IPARAM_N];
NB = iparam[IPARAM_NB];
NRHS = iparam[IPARAM_NRHS];
/* compute the algorithm complexity to evaluate performances */
fadds = (double)( FADDS_POTRF(N) + 2 * FADDS_TRSM(N,NRHS) );
fmuls = (double)( FMULS_POTRF(N) + 2 * FMULS_TRSM(N,NRHS) );
flops = 1e-9 * (fmuls + fadds);
gflops = 0.0;
cpu_time = 0.0;
/* initialize the number of thread if not given by the user in argv
* It makes sense only if this program is linked with pthread and
* multithreaded BLAS and LAPACK */
if ( iparam[IPARAM_THRDNBR] == -1 ) {
get_thread_count( &(iparam[IPARAM_THRDNBR]) );
/* reserve one thread par cuda device to optimize memory transfers */
iparam[IPARAM_THRDNBR] -= iparam[IPARAM_NCUDAS];
}
NCPU = iparam[IPARAM_THRDNBR];
NGPU = iparam[IPARAM_NCUDAS];
/* Initialize MORSE with main parameters */
if ( MORSE_Init( NCPU, NGPU ) != MORSE_SUCCESS ) {
fprintf(stderr, "Error initializing MORSE library\n");
return EXIT_FAILURE;
}
/* set some specific parameters related to MORSE: blocks size and inner-blocking size */
MORSE_Set(MORSE_TILE_SIZE, iparam[IPARAM_NB] );
MORSE_Set(MORSE_INNER_BLOCK_SIZE, iparam[IPARAM_IB] );
#if defined(CHAMELEON_USE_MPI)
MORSE_Comm_size( &NMPIPROC );
/* Check P */
if ( (iparam[IPARAM_P] > 1) &&
(NMPIPROC % iparam[IPARAM_P] != 0) ) {
fprintf(stderr, "ERROR: %d doesn't divide the number of MPI processus %d\n",
iparam[IPARAM_P], NMPIPROC );
return EXIT_FAILURE;
}
#endif
iparam[IPARAM_Q] = NMPIPROC / iparam[IPARAM_P];
iparam[IPARAM_NMPI] = NMPIPROC;
GRID_P = iparam[IPARAM_P];
GRID_Q = iparam[IPARAM_Q];
if ( MORSE_My_Mpi_Rank() == 0 ){
/* print informations to user */
print_header( argv[0], iparam);
}
/* Initialize the structure required for MORSE tile interface */
MORSE_Desc_Create(&descA, NULL, MorseRealDouble,
NB, NB, NB*NB, N, N, 0, 0, N, N,
GRID_P, GRID_Q);
MORSE_Desc_Create(&descB, NULL, MorseRealDouble,
NB, NB, NB*NB, N, NRHS, 0, 0, N, NRHS,
GRID_P, GRID_Q);
MORSE_Desc_Create(&descX, NULL, MorseRealDouble,
NB, NB, NB*NB, N, NRHS, 0, 0, N, NRHS,
GRID_P, GRID_Q);
MORSE_Desc_Create(&descAC, NULL, MorseRealDouble,
NB, NB, NB*NB, N, N, 0, 0, N, N,
GRID_P, GRID_Q);
/* generate A matrix with random values such that it is spd.
We use the callback function Morse_build_callback_plgsy() defined in step7.h
In this example, it is just a wrapper toward CORE_dplgsy() */
struct data_pl data_A={(double)N, 51, N};
MORSE_dbuild_Tile(MorseUpperLower, descA, (void*)&data_A, Morse_build_callback_plgsy);
/* generate RHS with the callback Morse_build_callback_plrnt() */
struct data_pl data_B={0., 5673, N};
MORSE_dbuild_Tile(MorseUpperLower, descB, (void*)&data_B, Morse_build_callback_plrnt);
/* copy A before facto. in order to check the result */
MORSE_dlacpy_Tile(MorseUpperLower, descA, descAC);
/* copy B in X before solving
* same sense as memcpy(X, B, N*NRHS*sizeof(double)) but for descriptors */
MORSE_dlacpy_Tile(MorseUpperLower, descB, descX);
/************************************************************/
/* solve the system AX = B using the Cholesky factorization */
/************************************************************/
cpu_time = -cWtime();
MORSE_Sequence_Create(&sequence);
/* Cholesky factorization:
* A is replaced by its factorization L or L^T depending on uplo */
MORSE_dpotrf_Tile_Async( UPLO, descA, sequence, &request );
/* Solve:
* B is stored in X on entry, X contains the result on exit.
* Forward and back substitutions
*/
MORSE_dpotrs_Tile_Async( UPLO, descA, descX, sequence, &request);
/* Synchronization barrier (the runtime ensures that all submitted tasks
* have been terminated */
MORSE_Sequence_Wait(sequence);
/* Ensure that all data processed on the gpus we are depending on are back
* in main memory */
RUNTIME_desc_getoncpu(descA);
RUNTIME_desc_getoncpu(descX);
status = sequence->status;
if ( status != 0 ) {
fprintf(stderr, "Error in computation (%d)\n", status);
return EXIT_FAILURE;
}
MORSE_Sequence_Destroy(sequence);
cpu_time += cWtime();
/* print informations to user */
gflops = flops / cpu_time;
if ( MORSE_My_Mpi_Rank() == 0 )
printf( "%9.3f %9.2f\n", cpu_time, gflops);
fflush( stdout );
/************************************************************/
/* check if solve is correct i.e. AX-B = 0 */
/************************************************************/
/* compute norms to check the result */
anorm = MORSE_dlange_Tile( MorseInfNorm, descAC);
bnorm = MORSE_dlange_Tile( MorseInfNorm, descB);
xnorm = MORSE_dlange_Tile( MorseInfNorm, descX);
/* compute A*X-B, store the result in B */
MORSE_dgemm_Tile( MorseNoTrans, MorseNoTrans,
1.0, descAC, descX, -1.0, descB );
res = MORSE_dlange_Tile( MorseInfNorm, descB );
/* check residual and print a message */
eps = LAPACKE_dlamch_work( 'e' );
/*
* if hres = 0 then the test succeed
* else the test failed
*/
hres = 0;
hres = ( res / N / eps / (anorm * xnorm + bnorm ) > 100.0 );
if ( MORSE_My_Mpi_Rank() == 0 ){
printf( " ||Ax-b|| ||A|| ||x|| ||b|| ||Ax-b||/N/eps/(||A||||x||+||b||) RETURN\n");
if (hres)
printf( "%8.5e %8.5e %8.5e %8.5e %8.5e FAILURE \n",
res, anorm, xnorm, bnorm,
res / N / eps / (anorm * xnorm + bnorm ));
else
printf( "%8.5e %8.5e %8.5e %8.5e %8.5e SUCCESS \n",
res, anorm, xnorm, bnorm,
res / N / eps / (anorm * xnorm + bnorm ));
}
/* deallocate A, B, X, Acpy and associated descriptors descA, ... */
MORSE_Desc_Destroy( &descA );
MORSE_Desc_Destroy( &descB );
MORSE_Desc_Destroy( &descX );
MORSE_Desc_Destroy( &descAC );
/* Finalize MORSE */
MORSE_Finalize();
return EXIT_SUCCESS;
}
/**
*
* @copyright (c) 2009-2014 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2014 Inria. All rights reserved.
* @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file step7.h
*
* MORSE example routines
* MORSE is a software package provided by Inria Bordeaux - Sud-Ouest, LaBRI,
* University of Bordeaux, Bordeaux INP
*
* @version 1.0.0
* @author Florent Pruvost
* @author Guillaume Sylvand
* @date 2016-09-05
*
**/
#ifndef step7_H
#define step7_H
/* Common include for all steps of the tutorial */
#include "lapack_to_morse.h"
/* Specific includes for step 6 */
#include <coreblas/include/lapacke.h>
#include <morse.h>
#if defined(CHAMELEON_USE_MPI)
#include <mpi.h>
#endif
/* Integer parameters for step7 */
enum iparam_step7 {
IPARAM_THRDNBR, /* Number of cores */
IPARAM_NCUDAS, /* Number of cuda devices */
IPARAM_NMPI, /* Number of cuda devices */
IPARAM_N, /* Number of columns of the matrix */
IPARAM_NB, /* Number of columns in a tile */
IPARAM_IB, /* Inner-blocking size */
IPARAM_NRHS, /* Number of RHS */
IPARAM_P, /* 2D block cyclic distribution parameter MB */
IPARAM_Q, /* 2D block cyclic distribution parameter NB */
/* End */
IPARAM_SIZEOF
};
/* Specific routines used in step7.c main program */
/******************************************************************************
* Initialize integer parameters
*/
static void init_iparam(int iparam[IPARAM_SIZEOF]){
iparam[IPARAM_THRDNBR ] = -1;
iparam[IPARAM_NCUDAS ] = 0;
iparam[IPARAM_NMPI ] = 1;
iparam[IPARAM_N ] = 500;
iparam[IPARAM_NB ] = 128;
iparam[IPARAM_IB ] = 32;
iparam[IPARAM_NRHS ] = 1;
iparam[IPARAM_P ] = 1;
iparam[IPARAM_Q ] = 1;
}
/******************************************************************************
* Callback function used to build matrix blocks
* Morse_build_callback_plgsy : random symmetric positive definite
* Morse_build_callback_plrnt : random
* These 2 functions use data_pl to get data on the matrix to build, passed through the opaque pointer 'user_data'
* The callback is expected to build the block of matrix [row_min, row_max] x [col_min, col_max]
* (with both min and max values included in the intervals, index start at 0 like in C, NOT 1 like in Fortran)
* and store it at the adresse 'buffer' with leading dimension 'ld'
*/
struct data_pl {
double bump;
unsigned long long int seed;
int bigM;
};
static void Morse_build_callback_plgsy(int row_min, int row_max, int col_min, int col_max, void *buffer, int ld, void *user_data) {
struct data_pl *data=(struct data_pl *)user_data;
CORE_dplgsy(data->bump, row_max-row_min+1, col_max-col_min+1, buffer, ld, data->bigM, row_min, col_min, data->seed);
}
static void Morse_build_callback_plrnt(int row_min, int row_max, int col_min, int col_max, void *buffer, int ld, void *user_data) {
struct data_pl *data=(struct data_pl *)user_data;
CORE_dplrnt(row_max-row_min+1, col_max-col_min+1, buffer, ld, data->bigM, row_min, col_min, data->seed);
}
/******************************************************************************
* Print how to use the program
*/
static void show_help(char *prog_name) {
printf( "Usage:\n%s [options]\n\n", prog_name );
printf( "Options are:\n"
" --help Show this help\n"
"\n"
" --n=X dimension (N). (default: 500)\n"
" --nb=X NB size. (default: 128)\n"
" --ib=X IB size. (default: 32)\n"
" --nrhs=X number of RHS. (default: 1)\n"
" --p=X 2D block cyclic distribution parameter MB. (default: 1)\n"
"\n"
" --threads=X Number of CPU workers (default: _SC_NPROCESSORS_ONLN)\n"
" --gpus=X Number of GPU workers (default: 0)\n"
"\n");
}
/******************************************************************************
* Read arguments following step7 program call
*/
static void read_args(int argc, char *argv[], int *iparam){
int i;
for (i = 1; i < argc && argv[i]; ++i) {
if ( startswith( argv[i], "--help") || startswith( argv[i], "-help") ||
startswith( argv[i], "--h") || startswith( argv[i], "-h") ) {
show_help( argv[0] );
exit(0);
} else if (startswith( argv[i], "--n=" )) {
sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_N]) );
} else if (startswith( argv[i], "--nb=" )) {
sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_NB]) );
} else if (startswith( argv[i], "--ib=" )) {
sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_IB]) );
} else if (startswith( argv[i], "--nrhs=" )) {
sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_NRHS]) );
} else if (startswith( argv[i], "--p=" )) {
sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_P]) );
} else if (startswith( argv[i], "--threads=" )) {
sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_THRDNBR]) );
} else if (startswith( argv[i], "--gpus=" )) {
sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_NCUDAS]) );
} else {
fprintf( stderr, "Unknown option: %s\n", argv[i] );
}
}
}
/******************************************************************************
* Print a header message to summarize main parameters
*/
static void print_header(char *prog_name, int * iparam) {
#if defined(CHAMELEON_SIMULATION)
double eps = 0.;
#else
double eps = LAPACKE_dlamch_work( 'e' );
#endif
printf( "#\n"
"# CHAMELEON %d.%d.%d, %s\n"
"# Nb threads: %d\n"
"# Nb gpus: %d\n"
"# Nb mpi: %d\n"
"# PxQ: %dx%d\n"
"# N: %d\n"
"# NB: %d\n"
"# IB: %d\n"
"# eps: %e\n"
"#\n",
CHAMELEON_VERSION_MAJOR,
CHAMELEON_VERSION_MINOR,
CHAMELEON_VERSION_MICRO,
prog_name,
iparam[IPARAM_THRDNBR],
iparam[IPARAM_NCUDAS],
iparam[IPARAM_NMPI],
iparam[IPARAM_P], iparam[IPARAM_Q],
iparam[IPARAM_N],
iparam[IPARAM_NB],
iparam[IPARAM_IB],
eps );
printf( "# M N K/NRHS seconds Gflop/s\n");
printf( "#%7d %7d %7d ", iparam[IPARAM_N], iparam[IPARAM_N], iparam[IPARAM_NRHS]);
fflush( stdout );
return;
}
#endif /* step7_H */
......@@ -309,6 +309,13 @@ int MORSE_zTile_to_Lapack(MORSE_desc_t *A, MORSE_Complex64_t *Af77, int LDA);
int MORSE_zLapack_to_Tile_Async(MORSE_Complex64_t *Af77, int LDA, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request);
int MORSE_zTile_to_Lapack_Async(MORSE_desc_t *A, MORSE_Complex64_t *Af77, int LDA, MORSE_sequence_t *sequence, MORSE_request_t *request);
/** ****************************************************************************
* User Builder function prototypes
**/
int MORSE_zbuild(MORSE_enum uplo, int M, int N, MORSE_Complex64_t *A, int LDA, void *user_data, void* user_build_callback);
int MORSE_zbuild_Tile(MORSE_enum uplo, MORSE_desc_t *A, void *user_data, void* user_build_callback );
int MORSE_zbuild_Tile_Async(MORSE_enum uplo, MORSE_desc_t *A, void *user_data, void* user_build_callback, MORSE_sequence_t *sequence, MORSE_request_t *request);
#ifdef __cplusplus
}
#endif
......
......@@ -451,7 +451,9 @@ void MORSE_TASK_zunmqr(MORSE_option_t *options,
MORSE_desc_t *A, int Am, int An, int lda,
MORSE_desc_t *T, int Tm, int Tn, int ldt,
MORSE_desc_t *C, int Cm, int Cn, int ldc);
void MORSE_TASK_zbuild( MORSE_option_t *options,
MORSE_desc_t *A, int Am, int An, int lda,
void *user_data, void* user_build_callback );
#ifdef __cplusplus
......
......@@ -148,6 +148,10 @@ set(ZSRC
codelets/codelet_zttqrt.c
codelets/codelet_zunmlq.c
codelets/codelet_zunmqr.c
##################
# BUILD
##################
codelets/codelet_zbuild.c
)
list(REMOVE_DUPLICATES ZSRC)
......
/**
*
* @copyright (c) 2009-2015 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2015 Inria. All rights reserved.
* @copyright (c) 2012-2015 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
*
**/
/**
* @file codelet_zbuild.c
*
* MORSE codelets kernel
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver
*
* @version 2.5.0
* @author Reazul Hoque
* @author Guillaume Sylvand
* @date 2016-09-05
* @precisions normal z -> c d s
*
**/
#include "runtime/parsec/include/morse_parsec.h"
static int
CORE_zbuild_parsec(dague_execution_unit_t *context, dague_execution_context_t *this_task)
{
MORSE_Complex64_t *A;
int *lda;
void *user_data;
void (*user_build_callback)(int row_min, int row_max, int col_min, int col_max, void *buffer, int ld, void *user_data) ;
int row_min, row_max, col_min, col_max;
dague_dtd_unpack_args(this_task,
UNPACK_VALUE, &row_min,
UNPACK_VALUE, &row_max,
UNPACK_VALUE, &col_min,
UNPACK_VALUE, &col_max,
UNPACK_DATA, &A,
UNPACK_VALUE, &lda,
UNPACK_VALUE, &user_data,
UNPACK_VALUE, &user_build_callback
);
user_build_callback(row_min, row_max, col_min, col_max, A, ld, user_data);
return 0;
}
void MORSE_TASK_zbuild( MORSE_option_t *options,
MORSE_desc_t *A, int Am, int An, int lda,
void *user_data, void* user_build_callback )
{
dague_dtd_handle_t* DAGUE_dtd_handle = (dague_dtd_handle_t *)(options->sequence->schedopt);
int row_min, row_max, col_min, col_max;
row_min = Am*A->mb ;
row_max = Am == A->mt-1 ? A->m-1 : row_min+A->mb-1 ;
col_min = An*A->nb ;
col_max = An == A->nt-1 ? A->n-1 : col_min+A->nb-1 ;
insert_task_generic_fptr(DAGUE_dtd_handle, CORE_zbuild_parsec, "zbuild",
sizeof(int), &row_min, VALUE,
sizeof(int), &row_max, VALUE,
sizeof(int), &col_min, VALUE,
sizeof(int), &col_max, VALUE,
PASSED_BY_REF, RTBLKADDR( A, MORSE_Complex64_t, Am, An ), OUTPUT | REGION_FULL,
sizeof(int), &lda, VALUE,
sizeof(void*), &user_data, VALUE,
sizeof(void*), &user_build_callback, VALUE
0);
}
......@@ -146,6 +146,10 @@ set(ZSRC
codelets/codelet_zttqrt.c
codelets/codelet_zunmlq.c
codelets/codelet_zunmqr.c
##################
# BUILD
##################
codelets/codelet_zbuild.c
)
precisions_rules_py(RUNTIME_SRCS_GENERATED "${ZSRC}"
......
/**
*
* @copyright (c) 2009-2014 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2014 Inria. All rights reserved.
* @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file codelet_zbuild.c
*
* MORSE codelets kernel
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver
*
* @version 2.5.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for MORSE 1.0.0
* @author Piotr Luszczek
* @author Pierre Lemarinier
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Guillaume Sylvand
* @date 2016-09-05
* @precisions normal z -> c d s
*
**/
#include "runtime/quark/include/morse_quark.h"
void MORSE_TASK_zbuild( MORSE_option_t *options,
MORSE_desc_t *A, int Am, int An, int lda,
void *user_data, void* user_build_callback )
{
quark_option_t *opt = (quark_option_t*)(options->schedopt);
DAG_CORE_BUILD;
int row_min, row_max, col_min, col_max;
row_min = Am*A->mb ;
row_max = Am == A->mt-1 ? A->m-1 : row_min+A->mb-1 ;
col_min = An*A->nb ;
col_max = An == A->nt-1 ? A->n-1 : col_min+A->nb-1 ;
QUARK_Insert_Task(opt->quark, CORE_zbuild_quark, (Quark_Task_Flags*)opt,
sizeof(int), &row_min, VALUE,
sizeof(int), &row_max, VALUE,
sizeof(int), &col_min, VALUE,
sizeof(int), &col_max, VALUE,
sizeof(MORSE_Complex64_t)*lda*n, RTBLKADDR(A, MORSE_Complex64_t, Am, An), OUTPUT,
sizeof(int), &lda, VALUE,
sizeof(void*), &user_data, VALUE,
sizeof(void*), &user_build_callback, VALUE,
0);
}
void CORE_zbuild_quark(Quark *quark)
{
MORSE_Complex64_t *A;
int lda;
void *user_data;
void (*user_build_callback)(int row_min, int row_max, int col_min, int col_max, void *buffer, int ld, void *user_data) ;
int row_min, row_max, col_min, col_max;
quark_unpack_args_8( quark, row_min, row_max, col_min, col_max, A, lda, user_data, user_build_callback);
user_build_callback(row_min, row_max, col_min, col_max, A, ld, user_data);
}
......@@ -132,6 +132,8 @@ void CORE_zgemm_p2_quark(Quark* quark);
void CORE_zgemm_p2f1_quark(Quark* quark);
void CORE_zgemm_p3_quark(Quark* quark);
void CORE_zbuild_quark(Quark *quark);
#ifdef __cplusplus
}
#endif
......
......@@ -166,6 +166,10 @@ set(ZSRC
codelets/codelet_zttqrt.c
codelets/codelet_zunmlq.c
codelets/codelet_zunmqr.c
##################
# BUILD
##################
codelets/codelet_zbuild.c
)
list(REMOVE_DUPLICATES ZSRC)
......
/**
*
* @copyright (c) 2009-2014 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2014 Inria. All rights reserved.
* @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file codelet_zbuild.c
*
* MORSE codelets kernel
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver
*
* @version 2.5.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for MORSE 1.0.0
* @author Piotr Luszczek
* @author Pierre Lemarinier
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Guillaume Sylvand
* @date 2016-09-05
* @precisions normal z -> c d s
*
**/
#include "runtime/starpu/include/morse_starpu.h"
#include "runtime/starpu/include/runtime_codelet_z.h"
void MORSE_TASK_zbuild( MORSE_option_t *options,
MORSE_desc_t *A, int Am, int An, int lda,
void *user_data, void* user_build_callback )
{
struct starpu_codelet *codelet = &cl_zbuild;
void (*callback)(void*) = options->profiling ? cl_zbuild_callback : NULL;
int row_min, row_max, col_min, col_max;
if ( morse_desc_islocal( A, Am, An ) )
{
row_min = Am*A->mb ;
row_max = Am == A->mt-1 ? A->m-1 : row_min+A->mb-1 ;
col_min = An*A->nb ;
col_max = An == A->nt-1 ? A->n-1 : col_min+A->nb-1 ;
starpu_insert_task(
codelet,
STARPU_VALUE, &row_min, sizeof(int),
STARPU_VALUE, &row_max, sizeof(int),
STARPU_VALUE, &col_min, sizeof(int),
STARPU_VALUE, &col_max, sizeof(int),
STARPU_W, RTBLKADDR(A, MORSE_Complex64_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_VALUE, &user_data, sizeof(void*),
STARPU_VALUE, &user_build_callback, sizeof(void*),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
0);
}
}
static void cl_zbuild_cpu_func(void *descr[], void *cl_arg)
{
int Am;
int An;
MORSE_Complex64_t *A;
int ld;
void *user_data;
void (*user_build_callback)(int row_min, int row_max, int col_min, int col_max, void *buffer, int ld, void *user_data) ;
int row_min, row_max, col_min, col_max;
A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
starpu_codelet_unpack_args(cl_arg, &row_min, &row_max, &col_min, &col_max, &ld, &user_data, &user_build_callback );
/* The callback 'user_build_callback' is expected to build the block of matrix [row_min, row_max] x [col_min, col_max]
* (with both min and max values included in the intervals, index start at 0 like in C, NOT 1 like in Fortran)
* and store it at the adresse 'buffer' with leading dimension 'ld'
*/
user_build_callback(row_min, row_max, col_min, col_max, A, ld, user_data);
}
/*
* Codelet definition
*/
CODELETS_CPU(zbuild, 1, cl_zbuild_cpu_func)
......@@ -55,6 +55,7 @@ CHAMELEON_CL_CB(zsytrf_nopiv, starpu_matrix_get_nx(task->handles[0]), 0, 0,
#endif
CHAMELEON_CL_CB(zplgsy, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N);
CHAMELEON_CL_CB(zplrnt, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N);
CHAMELEON_CL_CB(zbuild, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N);
CHAMELEON_CL_CB(zplssq, 1, 1, 0, 4);
CHAMELEON_CL_CB(zplssq2, 1, 1, 0, 1);
CHAMELEON_CL_CB(zpotrf, starpu_matrix_get_nx(task->handles[0]), 0, 0, (1./3.)*M* M*M);
......
......@@ -113,6 +113,7 @@ ZCODELETS_HEADER(asum)
* CPU only functions
*/
ZCODELETS_HEADER(plrnt)
ZCODELETS_HEADER(build)
#if defined(PRECISION_z) || defined(PRECISION_c)
ZCODELETS_HEADER(hessq)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment